Verwendete Pakete
knitr::opts_chunk$set(echo = T, message = F, warning = F)
library(rio)
library(tidyverse)
library(lubridate)
library(ggstance)
library(mice)
library(miceadds)
library(naniar)
library(psych)
library(ggrepel)
library(BayesFactor)
library(bindata)
library(pander)
library(corrgram)
library(hrbrthemes)
library(MASS)
library(bain)
library(plotly)
library(DescTools)R-Version
## $platform
## [1] "x86_64-pc-linux-gnu"
##
## $arch
## [1] "x86_64"
##
## $os
## [1] "linux-gnu"
##
## $system
## [1] "x86_64, linux-gnu"
##
## $status
## [1] ""
##
## $major
## [1] "3"
##
## $minor
## [1] "6.2"
##
## $year
## [1] "2019"
##
## $month
## [1] "12"
##
## $day
## [1] "12"
##
## $`svn rev`
## [1] "77560"
##
## $language
## [1] "R"
##
## $version.string
## [1] "R version 3.6.2 (2019-12-12)"
##
## $nickname
## [1] "Dark and Stormy Night"
Wir können die Daten aktuell leider nicht teilen. Bei konkreten Anfragen, bitte Mail an Autor*innen (klick auf Namen zu Dokumentbeginn).
gs <- gs %>%
dplyr::filter(`Laufende\nNummer` != "Beispiel") %>% # Beispielcodierungen löschen
mutate(id = as.numeric(`Laufende\nNummer`)) # Laufende Nummer als id
## Korrektur von Daten auf Basis der Abweichungen
# Falschberechnungen der `Min Beginn seit Std-Anfang` (id == 189, 248, 329) werden übersprungen
# da die selbst berechnete variable `beg_hw_min` verwendet wird
# aufgrund der unterschiedlichen Zeitzone muss zusätzlich noch eine h drauf gerechnet werden
gs[which(gs$id == 105), "Uhrzeit Beginn HA Vergabe"] <- "1899-12-31 11:48:00" # Tippfehler, nochmals in FB nachgeschaut
gs[which(gs$id == 105), "Uhrzeit Ende Vergabe"] <- "1899-12-31 11:51:00" # Tippfehler, nochmals in FB nachgeschaut
gs[which(gs$id == 120), "Uhrzeit Beginn HA Vergabe"] <- "1899-12-31 11:58:00" # Tippfehler, nochmals in FB nachgeschaut
gs[which(gs$id == 168), "Uhrzeit Beginn HA Vergabe"] <- "1899-12-31 11:43:00" # Tippfehler, nochmals in FB nachgeschaut
gs[which(gs$id == 250), "Uhrzeit Ende Vergabe"] <- "1899-12-31 12:33:00" # Tippfehler, nochmals in FB nachgeschaut
gs[which(gs$id == 187), "Uhrzeit Beginn der Std laut Plan"] <- "1899-12-31 09:20:00" # Tippfehler, nochmals in FB nachgeschaut
gs[which(gs$id == 187), "Uhrzeit Ende der Std laut Plan"] <- "1899-12-31 10:50:00" # Tippfehler, nochmals in FB nachgeschaut
gs[which(gs$id == 187), "Uhrzeit Beginn HA Vergabe"] <- "1899-12-31 10:24:00" # Tippfehler, nochmals in FB nachgeschaut
gs[which(gs$id == 187), "Uhrzeit Ende Vergabe"] <- "1899-12-31 10:40:00" # Tippfehler, nochmals in FB nachgeschaut
gs[which(gs$id == 307), "Uhrzeit Ende Vergabe"] <- "1899-12-31 10:16:00" # Tippfehler, nochmals in FB nachgeschaut
gs[which(gs$id == 315), "Uhrzeit Ende Vergabe"] <- "1899-12-31 11:55:00" # Tippfehler, nochmals in FB nachgeschaut
gs[which(gs$id == 220), "Ankündigung"] <- 1 # Tippfehler
gs[which(gs$id == 198), "L schreibt"] <- 1 # Tippfehler
gs[which(gs$id == 330), "L will Notation"] <- NA # Wert "9" gibt es nicht, nicht nachvollziehbar, welcher Wert plausibel ist
# new var
gs <- gs %>%
mutate(beg_plan = hm(format(strptime(`Uhrzeit Beginn der Std laut Plan`,"%Y-%m-%d %H:%M:%S"),'%H:%M')),
end_plan = hm(format(strptime(`Uhrzeit Ende der Std laut Plan`,"%Y-%m-%d %H:%M:%S"),'%H:%M')),
beg_act = hm(format(strptime(`Uhrzeit Realer Beginn der Std`,"%Y-%m-%d %H:%M:%S"),'%H:%M')),
beg_hw = hm(format(strptime(`Uhrzeit Beginn HA Vergabe`,"%Y-%m-%d %H:%M:%S"),'%H:%M')),
end_hw = hm(format(strptime(`Uhrzeit Ende Vergabe`,"%Y-%m-%d %H:%M:%S"),'%H:%M')),
stunde = lubridate::hour(end_plan - beg_plan)*60 + lubridate::minute(end_plan - beg_plan), # geplante Länge der Unterrichtsstunde
beg_hw_min = lubridate::hour(beg_hw - beg_plan)*60 + lubridate::minute(beg_hw - beg_plan), # Beginn der HA relativ zu GEPLANTEM h-Anfang
dau_hw_min = lubridate::hour(end_hw - beg_hw)*60 + lubridate::minute(end_hw - beg_hw),
lh_ank = `Ankündigung`,
lh_auf = `L verlangt Aufmerk`,
lh_nen = `L nennt`,
lh_sch = `L schreibt`,
lh_erl = `L erläutert`,
lh_wfr = `L will Fragen`,
lh_bfr = `L beantwortet`,
lh_wno = `L will Notation`,
lh_ano = `L achtet Notat`,
sh_auf = `S aufmerksam`,
sh_mel = `S melden`,
sh_fra = `S fragen`,
sh_not = `S notieren`)gym <- gym %>%
mutate(id = as.numeric(`Laufende\nNummer`)) # Laufende Nummer als id
# Korrekturen
gym[which(gym$id == 141 |
gym$id == 148 |
gym$id == 149 |
gym$id == 150 |
gym$id == 164 |
gym$id == 175 |
gym$id == 180 |
gym$id == 181 |
gym$id == 182 |
gym$id == 188 |
gym$id == 189), "L schreibt"] <- 1 # Tippfehler, in FB nachgeschaut
# new var
gym <- gym %>%
mutate(beg_hw_min = `Min Beginn`,
dau_hw_min = `HA stellen Dauer`,
stunde = case_when(Doppelstundenmodell == 0 ~ 45,
Doppelstundenmodell == 1 ~ 90),
Schulart = "Gymnasium",
lh_ank = `Ankündigung`,
lh_auf = `L sorg Aufmerk`,
lh_nen = `L nennt`,
lh_sch = `L schreibt`,
lh_erl = `Erläuterung gesamt`,
lh_wfr = `L will Fragen`,
lh_bfr = `L beantwortet`,
lh_wno = `L will Notation`,
lh_ano = `L achtet Notat`,
sh_auf = `S aufmerksam`,
sh_mel = `S melden`,
sh_fra = `Fragen gesamt`,
sh_not = `S notieren`)gs_p <- gs %>%
dplyr::select(beg_hw_min, dau_hw_min, stunde, Schulart, Klassenstufe, lh_ank, lh_auf, lh_nen,
lh_sch, lh_erl, lh_wfr, lh_bfr, lh_wno, lh_ano, sh_auf, sh_mel, sh_fra, sh_not)
gym_p <- gym %>%
dplyr::select(beg_hw_min, dau_hw_min, stunde, Schulart, Klassenstufe, lh_ank, lh_auf, lh_nen,
lh_sch, lh_erl, lh_wfr, lh_bfr, lh_wno, lh_ano, sh_auf, sh_mel, sh_fra, sh_not)
p_data <- rbind(gs_p, gym_p)
# filter away the 10 cross grade classes as they are difficult to analyze
# (combine characteristics of two grades in one)
p_data <- p_data %>%
dplyr::filter(Klassenstufe != "1_2" | is.na(Klassenstufe)) %>%
mutate(Klassenstufe = as.numeric(Klassenstufe))Skalenniveaus
fehlende Daten im Datensatz
Kombinationen der fehlenden Daten
bei der Imputation
imputation model
für jede Variable
p_data$Schulart <- as.factor(p_data$Schulart)
p_data$Schulart_n <- ifelse(p_data$Schulart == "Grundschule", 0, 1)
p_data$Klassenstufe <- as.numeric(p_data$Klassenstufe)
model_tab <- data.frame(variable = c("beg_hw_min", "dau_hw_min", "stunde", "Schulart",
"Klassenstufe", "lh_ank", "lh_auf", "lh_nen", "lh_sch",
"lh_erl", "lh_wfr", "lh_bfr", "lh_wno", "lh_ano", "sh_auf",
"sh_mel", "sh_fra", "sh_not", "Schulart_n"),
'scale type' = c("metric", "metric", "metric", "nominal",
"metric", "binary", "binary", "binary", "binary",
"binary", "binary", "metric", "binary", "binary", "metric",
"metric", "metric", "metric", "metric"),
method = c("pmm", "pmm", "pmm", "polyreg",
"pmm", "logreg", "logreg", "logreg", "logreg",
"logreg", "logreg", "pmm", "logreg", "logreg", "pmm",
"pmm", "pmm", "pmm", "~ifelse(Schulart == 'Grundschule', 0, 1)"))
pander(model_tab)| variable | scale.type | method |
|---|---|---|
| beg_hw_min | metric | pmm |
| dau_hw_min | metric | pmm |
| stunde | metric | pmm |
| Schulart | nominal | polyreg |
| Klassenstufe | metric | pmm |
| lh_ank | binary | logreg |
| lh_auf | binary | logreg |
| lh_nen | binary | logreg |
| lh_sch | binary | logreg |
| lh_erl | binary | logreg |
| lh_wfr | binary | logreg |
| lh_bfr | metric | pmm |
| lh_wno | binary | logreg |
| lh_ano | binary | logreg |
| sh_auf | metric | pmm |
| sh_mel | metric | pmm |
| sh_fra | metric | pmm |
| sh_not | metric | pmm |
| Schulart_n | metric | ~ifelse(Schulart == ‘Grundschule’, 0, 1) |
Defining methods.
# Defining methods
meth <- c("pmm", "pmm", "pmm", "polyreg",
"pmm", "logreg", "logreg", "logreg", "logreg",
"logreg", "logreg", "pmm", "logreg", "logreg", "pmm",
"pmm", "pmm", "pmm", "~ifelse(Schulart == 'Grundschule', 0, 1)")number of imputations
1000
selection of predictors
Auf Multikollinearität überprüfen:
# cor(y = p_data[,-4], x = !is.na(p_data[,-4]), use = "pair")
corrgram(p_data, lower.panel = "panel.pie", upper.panel = "panel.cor")Drei Variablen werden als Prädiktoren ausgeschlossen: stunde, lh_bfr, Schulart_n.
# set predictor specifications
ini <- mice(p_data,
maxit = 0,
m = m,
meth = meth,
seed = 666
)
pred <- ini$predictorMatrix
pred[,"stunde"] <- 0 # stunde highly correlated with beg_hw_min
pred[,"lh_bfr"] <- 0 # lh_bfr highly correlated with sh_fra
pred[,"Schulart_n"] <- 0 # Schulart_n is just numeric version of Schulartvariables that are function of other variables
Schulart_n ist eine nummerische Version von Schulart, siehe Imputationsmthode.
which variables to impute
Alle.
number of iterations
20
plausible values
Dieser Code wird je Imputation (1000) einen Plot generieren. Falls interessiert, kann dies auskommentiert werden, ansonsten spare ich den Platz.
All values seem plausible.
check convergence
Bayes Factor Design Analysis zur Bestimmung des BF Thresholds.
Aufgrund fehlender Referenzwerte nehmen wir eine Effektstärke von \(\varphi=.2\) an. Dieser liegt zwischen einem kleinen (\(\varphi=.1\)) und mittleren (\(\varphi=.3\)) Effekt nach Cohen (1988).
Es werden 1000 Simulationen durchgeführt. Aufgrund der Robustheit der Ergebnisse in diesem Bereich, verzichteten wir auf eine größere Anzahl an Simulationen.
sim_cor_results <- data.frame() # set up data frame for results
for(j in true_hyp) { # loop over both hypotheses
if (j == "H0 is true")
bincorr <- matrix(c(1,phi[1],phi[1],1), ncol=2) # create correlation matrix for H0
if (j == "H1 is true")
bincorr <- matrix(c(1,phi[2],phi[2],1), ncol=2) # create correlation matrix for H1
for (n in 1:n_sim) {
sim_df <- rmvbin(n = 510,
margprob = c(0.5, 0.5),
bincorr = bincorr)
sim_imp_m <- matrix(table(sim_df[,1], sim_df[,2]), 2, 2)
sim_fit <- contingencyTableBF(sim_imp_m, sampleType="indepMulti",fixedMargin = "cols")
sim_cor_results[n+ifelse(j == "H0 is true", 0, n_sim), "BayesFactor"] <- extractBF(sim_fit)$bf
sim_cor_results[n+ifelse(j == "H0 is true", 0, n_sim), "trueH"] <- j
rm(sim_imp_m, sim_fit)
}
}
# categorize if result is correct, incorrect or inconclusive
sim_cor_results <- sim_cor_results %>%
mutate(BF3 = case_when(
BayesFactor >= 3 & trueH == "H0 is true" ~ "incorrect",
BayesFactor < 3 & BayesFactor > (1/3) & trueH == "H0 is true" ~ "inconclusive",
BayesFactor <= (1/3) & trueH == "H0 is true" ~ "correct",
BayesFactor >= 3 & trueH == "H1 is true" ~ "correct",
BayesFactor < 3 & BayesFactor > (1/3) & trueH == "H1 is true" ~ "inconclusive",
BayesFactor <= (1/3) & trueH == "H1 is true" ~ "incorrect"),
BF5 = case_when(
BayesFactor >= 5 & trueH == "H0 is true" ~ "incorrect",
BayesFactor < 5 & BayesFactor > (1/5) & trueH == "H0 is true" ~ "inconclusive",
BayesFactor <= (1/5) & trueH == "H0 is true" ~ "correct",
BayesFactor >= 5 & trueH == "H1 is true" ~ "correct",
BayesFactor < 5 & BayesFactor > (1/5) & trueH == "H1 is true" ~ "inconclusive",
BayesFactor <= (1/5) & trueH == "H1 is true" ~ "incorrect"),
BF10 = case_when(
BayesFactor >= 10 & trueH == "H0 is true" ~ "incorrect",
BayesFactor < 10 & BayesFactor > (1/10) & trueH == "H0 is true" ~ "inconclusive",
BayesFactor <= (1/10) & trueH == "H0 is true" ~ "correct",
BayesFactor >= 10 & trueH == "H1 is true" ~ "correct",
BayesFactor < 10 & BayesFactor > (1/10) & trueH == "H1 is true" ~ "inconclusive",
BayesFactor <= (1/10) & trueH == "H1 is true" ~ "incorrect"),
)
# pivot into long data frame for plot
sim_cor_results_l <- pivot_longer(sim_cor_results,
cols = 3:5,
names_to = "BF Threshold",
values_to = "decision")
# order factor for plot
sim_cor_results_l$`BF Threshold` <- factor(sim_cor_results_l$`BF Threshold`, levels = c("BF3", "BF5", "BF10"))
# hrbrthemes::import_roboto_condensed()
ggplot(sim_cor_results_l, aes(trueH, fill = decision)) +
geom_bar(position = "fill") +
geom_text(aes(label=round(..count../n_sim*100), y= ..count../n_sim),
position =position_stack(vjust = 0.5), stat= "count",
color = "white", size = 5) +
coord_flip() +
facet_wrap(~`BF Threshold`, ncol = 1) +
labs(title = "Results of the Bayes Factor Design Analysis",
subtitle = "For three different Bayes Factors",
caption = paste("In % (rounded), based on", n_sim, "simulations")) +
xlab("True Hypothesis") +
scale_fill_viridis_d() +
theme_ipsum_rc()Die Ergebnisse zeigen, dass bei einem BF von 3 kaum falsch-positive Ergebnisse zustande kommen (~1%) und die Power jeweils zufriedenstellend bzw. sehr hoch ist. Es ist somit nicht nötig einen höheren BF zu verweneden, um falsch-positive Ergebnisse zu vermeiden. Höhere BFs hätten zudem den Nachteil, dass vermehrt inkonklusive Ergebnisse auftreten. Wir verwenden bei der Auswertung der binären Zusammenhänge (Kreuztabellen) somit einen Threshold von \(BF=3\) bzw. \(BF=\frac{1}{3}\).
Bayes Factor Design Analysis zur Bestimmung des BF Thresholds.
Aufgrund fehlender Referenzwerte nehmen wir eine Effektstärke von \(d=.35\) an. Dieser liegt zwischen einem kleinen (\(d=.2\)) und mittleren (\(d=.5\)) Effekt nach Cohen (1988).
Es werden 1000 Simulationen durchgeführt. Aufgrund der Robustheit der Ergebnisse in diesem Bereich, verzichteten wir auf eine größere Anzahl an Simulationen.
sim_ttest_results <- data.frame() # set up data frame for results
for(j in true_hyp) { # loop over both hypotheses
for (n in 1:n_sim) { # loop over all simulations
sim_ttest_df <- data.frame(mvrnorm(n = 510, # fixed n of 510
mu = if(j == "H0 is true")
c(0,true_d[1]) else # create data set for H0
if(j == "H1 is true")
c(0,true_d[2]), # create data set for H1
Sigma = matrix(c( 1, .5, # vcov matrix
.5, 1),
2, 2)))
# pivot longer to insert it into a lm
sim_ttest_df_l <- pivot_longer(sim_ttest_df, 1:2, names_to = "group", values_to = "dependentVar")
### LINEAR MODEL ############################################ #
# compute the means of each group
sim_fit_ttest <- lm(dependentVar ~ group-1,
data = sim_ttest_df_l)
### BAIN #################################################### #
# generating hypotheses
hypotheses <- "groupX1 = groupX2; groupX1 < groupX2" #H1 and H2 respectively
bf_ttest <- bain(sim_fit_ttest,
hypothesis = hypotheses
)
sim_ttest_results[n+ifelse(j == "H0 is true", 0, n_sim),"BayesFactor"] <- bf_ttest$BFmatrix[2,1] # BF(H2,H1)
sim_ttest_results[n+ifelse(j == "H0 is true", 0, n_sim), "trueH"] <- j
rm(bf_ttest, sim_fit_ttest, sim_ttest_df, sim_ttest_df_l)
}
}
# categorize if result is correct, incorrect or inconclusive
sim_ttest_results <- sim_ttest_results %>%
mutate(BF3 = case_when(
BayesFactor >= 3 & trueH == "H0 is true" ~ "incorrect",
BayesFactor < 3 & BayesFactor > (1/3) & trueH == "H0 is true" ~ "inconclusive",
BayesFactor <= (1/3) & trueH == "H0 is true" ~ "correct",
BayesFactor >= 3 & trueH == "H1 is true" ~ "correct",
BayesFactor < 3 & BayesFactor > (1/3) & trueH == "H1 is true" ~ "inconclusive",
BayesFactor <= (1/3) & trueH == "H1 is true" ~ "incorrect"),
BF5 = case_when(
BayesFactor >= 5 & trueH == "H0 is true" ~ "incorrect",
BayesFactor < 5 & BayesFactor > (1/5) & trueH == "H0 is true" ~ "inconclusive",
BayesFactor <= (1/5) & trueH == "H0 is true" ~ "correct",
BayesFactor >= 5 & trueH == "H1 is true" ~ "correct",
BayesFactor < 5 & BayesFactor > (1/5) & trueH == "H1 is true" ~ "inconclusive",
BayesFactor <= (1/5) & trueH == "H1 is true" ~ "incorrect"),
BF10 = case_when(
BayesFactor >= 10 & trueH == "H0 is true" ~ "incorrect",
BayesFactor < 10 & BayesFactor > (1/10) & trueH == "H0 is true" ~ "inconclusive",
BayesFactor <= (1/10) & trueH == "H0 is true" ~ "correct",
BayesFactor >= 10 & trueH == "H1 is true" ~ "correct",
BayesFactor < 10 & BayesFactor > (1/10) & trueH == "H1 is true" ~ "inconclusive",
BayesFactor <= (1/10) & trueH == "H1 is true" ~ "incorrect"),
)
# pivot into long data frame for plot
sim_ttest_results_l <- pivot_longer(sim_ttest_results,
cols = 3:5,
names_to = "BF Threshold",
values_to = "decision")
# order factor for plot
sim_ttest_results_l$`BF Threshold` <- factor(sim_ttest_results_l$`BF Threshold`, levels = c("BF3", "BF5", "BF10"))
# hrbrthemes::import_roboto_condensed()
ggplot(sim_ttest_results_l, aes(trueH, fill = decision)) +
geom_bar(position = "fill") +
geom_text(aes(label=round(..count../n_sim*100), y= ..count../n_sim),
position =position_stack(vjust = 0.5), stat= "count",
color = "white", size = 5) +
coord_flip() +
facet_wrap(~`BF Threshold`, ncol = 1) +
labs(title = "Results of the Bayes Factor Design Analysis",
subtitle = "For three different Bayes Factors",
caption = paste("In % (rounded), based on", n_sim, "simulations")) +
xlab("True Hypothesis") +
scale_fill_viridis_d() +
theme_ipsum_rc()Hier zeigen sich bei einem \(N=510\) und einem angenommenen Cohen’s \(d=.35\) ebenfalls (nahezu) keine falsch-positiven Ergebnisse. Somit wird auch hier auf ein Threshold von \(BF=3\) bzw. \(BF=\frac{1}{3}\) verwendet.
Deskriptive Daten Einzelstunden
pander::panderOptions('digits', 10)
pander::panderOptions('round', 3)
pander::panderOptions('keep.trailing.zeros', TRUE)
## 45 Minuten Stunden ################ #
# alle Schulstunden mit Länge 45min herausfiltern
schulart45 <- p_data %>%
dplyr::filter(stunde == 45)
# Deskriptiven Werte für diese Stunden
descriptives45 <- p_data %>%
dplyr::filter(stunde == 45) %>%
dplyr::select(beg_hw_min, dau_hw_min) %>%
psych::describeBy(group = schulart45$Schulart)
pander::pander(descriptives45)Grundschule:
| vars | n | mean | sd | median | trimmed | mad | min | |
|---|---|---|---|---|---|---|---|---|
| beg_hw_min | 1 | 270 | 33.330 | 13.251 | 39 | 34.745 | 5.930 | 1 |
| dau_hw_min | 2 | 267 | 4.524 | 3.150 | 4 | 4.074 | 1.483 | 0 |
| max | range | skew | kurtosis | se | |
|---|---|---|---|---|---|
| beg_hw_min | 83 | 82 | -0.728 | 0.366 | 0.806 |
| dau_hw_min | 25 | 25 | 2.133 | 7.826 | 0.193 |
Gymnasium:
| vars | n | mean | sd | median | trimmed | mad | min | |
|---|---|---|---|---|---|---|---|---|
| beg_hw_min | 1 | 143 | 38.839 | 8.347 | 42 | 40.504 | 2.965 | 1 |
| dau_hw_min | 2 | 142 | 2.937 | 2.216 | 2 | 2.667 | 1.483 | 0 |
| max | range | skew | kurtosis | se | |
|---|---|---|---|---|---|
| beg_hw_min | 49 | 48 | -2.628 | 7.853 | 0.698 |
| dau_hw_min | 15 | 15 | 2.063 | 7.003 | 0.186 |
# Modus
dens_45gs <- schulart45 %>% # filter only measured cases
dplyr::filter(!is.na(beg_hw_min) & Schulart == "Grundschule") #filter for Grundschule
dens_45gs <- density(dens_45gs$beg_hw_min) # get density curve
dens_45gy <- schulart45 %>% # filter only measured cases
dplyr::filter(!is.na(beg_hw_min) & Schulart == "Gymnasium") #filter for Gymnasium
dens_45gy <- density(dens_45gy$beg_hw_min) # get density curve
paste("Modus der Hausaufgabenvergabe in 45min Stunden Grundschule =", round(dens_45gs$x[which.max(dens_45gs$y)], 1))## [1] "Modus der Hausaufgabenvergabe in 45min Stunden Grundschule = 41.1"
## [1] "Modus der Hausaufgabenvergabe in 45min Stunden Gymnasium = 42.9"
Deskriptive Daten Doppelstunden
pander::panderOptions('digits', 10)
pander::panderOptions('round', 3)
pander::panderOptions('keep.trailing.zeros', TRUE)
## 90 Minuten Stunden ################ #
# alle Schulstunden mit Länge 45min herausfiltern
schulart90 <- p_data %>%
dplyr::filter(stunde == 90)
# Deskriptiven Werte für diese Stunden
descriptives90 <- p_data %>%
dplyr::filter(stunde == 90) %>%
dplyr::select(beg_hw_min, dau_hw_min) %>%
psych::describeBy(group = schulart90$Schulart)
pander::pander(descriptives90)Grundschule:
| vars | n | mean | sd | median | trimmed | mad | min | |
|---|---|---|---|---|---|---|---|---|
| beg_hw_min | 1 | 26 | 69.346 | 24.095 | 80.0 | 73.545 | 7.413 | 5 |
| dau_hw_min | 2 | 26 | 8.269 | 7.411 | 5.5 | 7.091 | 3.706 | 1 |
| max | range | skew | kurtosis | se | |
|---|---|---|---|---|---|
| beg_hw_min | 85 | 80 | -1.732 | 1.692 | 4.725 |
| dau_hw_min | 35 | 34 | 2.056 | 4.239 | 1.453 |
Gymnasium:
| vars | n | mean | sd | median | trimmed | mad | min | |
|---|---|---|---|---|---|---|---|---|
| beg_hw_min | 1 | 42 | 79.048 | 15.862 | 85 | 82.206 | 5.930 | 29 |
| dau_hw_min | 2 | 40 | 3.825 | 2.772 | 3 | 3.438 | 2.965 | 1 |
| max | range | skew | kurtosis | se | |
|---|---|---|---|---|---|
| beg_hw_min | 95 | 66 | -1.723 | 2.020 | 2.448 |
| dau_hw_min | 13 | 12 | 1.169 | 1.263 | 0.438 |
# Modus
dens_90gs <- schulart90 %>% # filter only measured cases
dplyr::filter(!is.na(beg_hw_min) & Schulart == "Grundschule") #filter for Grundschule
dens_90gs <- density(dens_90gs$beg_hw_min) # get density curve
dens_90gy <- schulart90 %>% # filter only measured cases
dplyr::filter(!is.na(beg_hw_min) & Schulart == "Gymnasium") #filter for Gymnasium
dens_90gy <- density(dens_90gy$beg_hw_min) # get density curve
paste("Modus der Hausaufgabenvergabe in 90min Stunden Grundschule =", round(dens_90gs$x[which.max(dens_90gs$y)], 1))## [1] "Modus der Hausaufgabenvergabe in 90min Stunden Grundschule = 81.8"
## [1] "Modus der Hausaufgabenvergabe in 90min Stunden Gymnasium = 87.2"
#### IMPUTATION #### #
# separate Imputation für 45min
p_data_45 <- p_data %>%
dplyr::filter(stunde==45)
imp45 <- mice(p_data_45,
maxit = maxit,
m = m,
meth = meth,
pred = pred,
seed = 666,
printFlag = F
)
#### Compute BFs within informed hypotheses framework (bain) ##################################### #
### for bain: compute vcov by hand ###
# create data frame to collect var and cov for each imputation
vcov_df <- data.frame(Grundschule = as.numeric(),
Gymnasium = as.numeric()
)
mean_df <- data.frame(Grundschule = as.numeric(),
Gymnasium = as.numeric()
)
# sample size per group
samp_gr <- table(mice::complete(imp45)$Schulart)
## loop over all m imputations
for(i in 1:m) {
# fit model
fit_lm <- lm(beg_hw_min ~ Schulart-1, data = mice::complete(imp45, i))
var_lm <- summary(fit_lm)$sigma**2 # get variance of the means (VOM)
vcov_df[i, "Grundschule"] <- var_lm/samp_gr["Grundschule"] # compute VOM per group
vcov_df[i, "Gymnasium"] <- var_lm/samp_gr["Gymnasium"] # compute VOM per group
# collect estimates of the means
mean_df[i, "Grundschule"] <- coef(fit_lm)["SchulartGrundschule"]
mean_df[i, "Gymnasium"] <- coef(fit_lm)["SchulartGymnasium"]
rm(fit_lm, var_lm) # clean up, because we like it tidy in here
}
## make var matrices
# compute the mean over var
vcov_df <- vcov_df %>%
summarize_all(mean)
# create matrices
mat1 <- matrix(vcov_df$Grundschule, 1, 1)
mat2 <- matrix(vcov_df$Gymnasium, 1, 1)
variances <- list(mat1, mat2)
## compute mean of estimates
bf_data <- mean_df %>%
summarize_all(mean)
bf_data <- as.numeric(bf_data)
names(bf_data) <- c("Grund", "Gym")
## BAIN ##### #
# generating hypotheses
hypotheses <- "Gym = Grund; Gym < Grund; Gym > Grund"
bf_45 <- bain(bf_data,
hypothesis = hypotheses,
n = samp_gr, # size of the groups
Sigma = variances, # matrices of residual variances of groups
group_parameters = 1, # there is 1 group specific parameter (the mean in each group)
joint_parameters = 0 # there are no parameters that apply to each of the groups (e.g. the regression coefficient of a covariate)
)
print(bf_45)## Bayesian informative hypothesis testing for an object of class numeric:
##
## Fit_eq Com_eq Fit_in Com_in Fit Com BF PMPa PMPb
## H1 0.000 0.017 1.000 1.000 0.000 0.017 0.001 0.000 0.000
## H2 1.000 1.000 0.000 0.500 0.000 0.500 0.000 0.000 0.000
## H3 1.000 1.000 1.000 0.500 1.000 0.500 333236.770 1.000 0.667
## Hu 0.333
##
## Hypotheses:
## H1: Gym=Grund
## H2: Gym<Grund
## H3: Gym>Grund
##
## Note: BF denotes the Bayes factor of the hypothesis at hand versus its complement.
## H1 H2 H3
## H1 1.000000e+00 115.0622 3.452867e-04
## H2 8.690949e-03 1.0000 3.000869e-06
## H3 2.896144e+03 333236.7735 1.000000e+00
###### DESCRIPTIVE PLOT ############################################### #
## Awesome Rainvloud plots, check out source:
# Allen M, Poggiali D, Whitaker K et al. Raincloud plots: a multi-platform tool for
# robust data visualization [version 1; peer review: 2 approved].
# Wellcome Open Res 2019, 4:63. DOI: 10.12688/wellcomeopenres.15191.1
source("R_rainclouds.R")
ggplot(p_data%>%dplyr::filter(stunde == 45), aes(x="", y = beg_hw_min, fill = Schulart, colour = Schulart)) +
geom_flat_violin(position = position_nudge(x = 0, y = 0), adjust = 1.6, trim = FALSE, alpha = .3) +
geom_boxplot(aes(x=""), position = position_nudge(x = c(.49, .54), y = 0),
outlier.shape = NA, alpha = .5, width = .04, colour = "black") +
geom_hline(yintercept = 45, linetype = "dashed", colour = "#696f71", size = 1) +
scale_colour_brewer(palette = "Set1")+
scale_fill_brewer(palette = "Set1") +
scale_y_continuous(expand = c(0, 0), breaks = c(0, 10,20,30,40,45,50,60), limits = c(0, 65)) +
scale_x_discrete(expand = c(0, 0)) +
ylab("Minuten seit geplantem Stundenbeginn") +
xlab("% Häufigkeit") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7")) +
coord_flip()Aufgrund fehlender Standards im pooling von Bayes Faktoren (BF), berechnen wir für jeden Datensatz einen Bayes Faktor und berichten anschließend deren Verteilung.
# Compute density of BFs
kl_results <- data.frame()
for (i in 1:m) {
fit <- correlationBF(y = mice::complete(imp45, i)$beg_hw_min, # correlation
x = mice::complete(imp45, i)$Klassenstufe)
kl_results[i,"beg_hw_min-Klassenstufe"] <- extractBF(fit)$bf # save BF in data frame
rm(fit)
}
# show summary of BFs
summary(kl_results[,"beg_hw_min-Klassenstufe"]) ## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 439.2 1498.7 1934.4 2057.6 2419.9 6291.9
# plot BFs distribution
ggplot(kl_results, aes(x = `beg_hw_min-Klassenstufe`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `beg_hw_min-Klassenstufe`, y=""),
height = 1, width = 0, alpha = .3,
size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4,
size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4,
size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))#### IMPUTATION #### #
# separate Imputation für 90min
p_data_90 <- p_data %>%
dplyr::filter(stunde==90)
imp90 <- mice(p_data_90,
maxit = maxit,
m = m,
meth = meth,
pred = pred,
seed = 666,
printFlag = F
)
#### Compute BFs within informed hypotheses framework (bain) ##################################### #
### for bain: compute vcov by hand ###
# create data frame to collect var and cov for each imputation
vcov_df <- data.frame(Grundschule = as.numeric(),
Gymnasium = as.numeric()
)
mean_df <- data.frame(Grundschule = as.numeric(),
Gymnasium = as.numeric()
)
# sample size per group
samp_gr <- table(mice::complete(imp90)$Schulart)
## loop over all m imputations
for(i in 1:m) {
fit_lm <- lm(beg_hw_min ~ Schulart-1, data = mice::complete(imp90, i))
var_lm <- summary(fit_lm)$sigma**2 # get variance of the means (VOM)
vcov_df[i, "Grundschule"] <- var_lm/samp_gr["Grundschule"] # compute VOM per group
vcov_df[i, "Gymnasium"] <- var_lm/samp_gr["Gymnasium"] # compute VOM per group
# collect estimates of the means
mean_df[i, "Grundschule"] <- coef(fit_lm)["SchulartGrundschule"]
mean_df[i, "Gymnasium"] <- coef(fit_lm)["SchulartGymnasium"]
rm(fit_lm, var_lm) # clean up, because we like it tidy in here
}
## make var matrices
# compute the mean over var
vcov_df <- vcov_df %>%
summarize_all(mean)
# create matrices
mat1 <- matrix(vcov_df$Grundschule, 1, 1)
mat2 <- matrix(vcov_df$Gymnasium, 1, 1)
variances <- list(mat1, mat2)
## compute mean of estimates
bf_data <- mean_df %>%
summarize_all(mean)
bf_data <- as.numeric(bf_data)
names(bf_data) <- c("Grund", "Gym")
## BAIN ##### #
# generating hypotheses
hypotheses <- "Gym = Grund; Gym < Grund; Gym > Grund"
bf_90 <- bain(bf_data,
hypothesis = hypotheses,
n = samp_gr, # size of the groups
Sigma = variances, # matrices of residual variances of groups
group_parameters = 1, # there is 1 group specific parameter (the mean in each group)
joint_parameters = 0 # there are no parameters that apply to each of the groups (e.g. the regression coefficient of a covariate)
)
print(bf_90)## Bayesian informative hypothesis testing for an object of class numeric:
##
## Fit_eq Com_eq Fit_in Com_in Fit Com BF PMPa PMPb
## H1 0.011 0.010 1.000 1.000 0.011 0.010 1.075 0.350 0.264
## H2 1.000 1.000 0.023 0.500 0.023 0.500 0.023 0.015 0.011
## H3 1.000 1.000 0.977 0.500 0.977 0.500 43.415 0.636 0.480
## Hu 0.245
##
## Hypotheses:
## H1: Gym=Grund
## H2: Gym<Grund
## H3: Gym>Grund
##
## Note: BF denotes the Bayes factor of the hypothesis at hand versus its complement.
## H1 H2 H3
## H1 1.00000000 23.87773 0.54998494
## H2 0.04188002 1.00000 0.02303338
## H3 1.81823161 43.41525 1.00000000
###### DESCRIPTIVE PLOT ############################################### #
ggplot(p_data%>%dplyr::filter(stunde == 90),
aes(x="", y = beg_hw_min, fill = Schulart, colour = Schulart)) +
geom_flat_violin(position = position_nudge(x = 0, y = 0),
adjust = 1.6, trim = FALSE, alpha = .3) +
geom_boxplot(aes(x=""), position = position_nudge(x = c(.50, .56), y = 0),
outlier.shape = NA, alpha = .5, width = .05, colour = "black") +
geom_hline(yintercept = 90, linetype = "dashed", colour = "#696f71", size = 1) +
scale_colour_brewer(palette = "Set1")+
scale_fill_brewer(palette = "Set1") +
scale_y_continuous(expand = c(0, 0),
breaks = c(0, 10,20,30,40,50,60,70,80,90),
limits = c(0,95)) +
scale_x_discrete(expand = c(0, 0)) +
ylab("Minuten seit geplantem Stundenbeginn") +
xlab("% Häufigkeit") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7")) +
coord_flip()Aufgrund fehlender Standards im pooling von Bayes Faktoren (BF), berechnen wir für jeden Datensatz einen Bayes Faktor und berichten anschließend deren Verteilung.
# Compute density of BFs
kl_results <- data.frame()
for (i in 1:m) {
fit <- correlationBF(y = mice::complete(imp90, i)$beg_hw_min, # correlation
x = mice::complete(imp90, i)$Klassenstufe)
kl_results[i,"beg_hw_min-Klassenstufe"] <- extractBF(fit)$bf # save BF in data frame
rm(fit)
}
# show summary of BFs
summary(kl_results[,"beg_hw_min-Klassenstufe"]) ## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 6.937 6.937 6.937 6.937 6.937 6.937
# plot BFs distribution
ggplot(kl_results, aes(x = `beg_hw_min-Klassenstufe`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `beg_hw_min-Klassenstufe`, y=""),
height = 1, width = 0, alpha = .3,
size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4,
size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4,
size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))#### Compute BFs within informed hypotheses framework (bain) ##################################### #
### for bain: compute vcov by hand ###
# create data frame to collect var and cov for each imputation
vcov_df <- data.frame(Grundschule = as.numeric(),
Gymnasium = as.numeric()
)
mean_df <- data.frame(Grundschule = as.numeric(),
Gymnasium = as.numeric()
)
# sample size per group
samp_gr <- table(mice::complete(imp45)$Schulart)
## loop over all m imputations
for(i in 1:m) {
fit_lm <- lm(dau_hw_min ~ Schulart-1, data = mice::complete(imp45, i))
var_lm <- summary(fit_lm)$sigma**2 # get variance of the means (VOM)
vcov_df[i, "Grundschule"] <- var_lm/samp_gr["Grundschule"] # compute VOM per group
vcov_df[i, "Gymnasium"] <- var_lm/samp_gr["Gymnasium"] # compute VOM per group
# collect estimates of the means
mean_df[i, "Grundschule"] <- coef(fit_lm)["SchulartGrundschule"]
mean_df[i, "Gymnasium"] <- coef(fit_lm)["SchulartGymnasium"]
rm(fit_lm, var_lm) # clean up, because we like it tidy in here
}
## make var matrices
# compute the mean over var
vcov_df <- vcov_df %>%
summarize_all(mean)
# create matrices
mat1 <- matrix(vcov_df$Grundschule, 1, 1)
mat2 <- matrix(vcov_df$Gymnasium, 1, 1)
variances <- list(mat1, mat2)
## compute mean of estimates
bf_data <- mean_df %>%
summarize_all(mean)
bf_data <- as.numeric(bf_data)
names(bf_data) <- c("Grund", "Gym")
## BAIN ##### #
# generating hypotheses
hypotheses <- "Gym = Grund; Gym < Grund; Gym > Grund"
bf_45 <- bain(bf_data,
hypothesis = hypotheses,
n = samp_gr, # size of the groups
Sigma = variances, # matrices of residual variances of groups
group_parameters = 1, # there is 1 group specific parameter (the mean in each group)
joint_parameters = 0 # there are no parameters that apply to each of the groups (e.g. the regression coefficient of a covariate)
)
print(bf_45)## Bayesian informative hypothesis testing for an object of class numeric:
##
## Fit_eq Com_eq Fit_in Com_in Fit Com BF PMPa PMPb
## H1 0.000 0.069 1.000 1.000 0.000 0.069 0.000 0.000 0.000
## H2 1.000 1.000 1.000 0.500 1.000 0.500 19180728.204 1.000 0.667
## H3 1.000 1.000 0.000 0.500 0.000 0.500 0.000 0.000 0.000
## Hu 0.333
##
## Hypotheses:
## H1: Gym=Grund
## H2: Gym<Grund
## H3: Gym>Grund
##
## Note: BF denotes the Bayes factor of the hypothesis at hand versus its complement.
## H1 H2 H3
## H1 1.000000e+00 6.970502e-06 1.336994e+02
## H2 1.434617e+05 1.000000e+00 1.918075e+07
## H3 7.479464e-03 5.213562e-08 1.000000e+00
###### DESCRIPTIVE PLOT ############################################### #
ggplot(p_data%>%dplyr::filter(stunde == 45), aes(x=Schulart, y = dau_hw_min, fill = Schulart)) +
geom_flat_violin(position = position_nudge(x = .1, y = 0),
adjust = 1.6, trim = FALSE, alpha = .3, colour = NA) +
geom_point(position = position_jitter(width = .05), size = 1, alpha = 0.5, aes(color=Schulart)) +
geom_boxplot(position = position_nudge(x = -.15, y = 0),
outlier.shape = NA, alpha = .5, width = .1, colour = "black") +
scale_colour_brewer(palette = "Set1")+
scale_y_continuous(expand = c(0, 0)) +
scale_fill_brewer(palette = "Set1") +
ylab("Minuten") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))Aufgrund fehlender Standards im pooling von Bayes Faktoren (BF), berechnen wir für jeden Datensatz einen Bayes Faktor und berichten anschließend deren Verteilung.
# Compute density of BFs
kl_results <- data.frame()
for (i in 1:m) {
fit <- correlationBF(y = mice::complete(imp45, i)$dau_hw_min, # correlation
x = mice::complete(imp45, i)$Klassenstufe)
kl_results[i,"dau_hw_min-Klassenstufe"] <- extractBF(fit)$bf # save BF in data frame
rm(fit)
}
# show summary of BFs
summary(kl_results[,"dau_hw_min-Klassenstufe"]) ## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 153766 2977665 4352611 5419865 6829702 28537391
# plot BFs distribution
ggplot(kl_results, aes(x = `dau_hw_min-Klassenstufe`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `dau_hw_min-Klassenstufe`, y=""),
height = 1, width = 0, alpha = .3,
size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4,
size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4,
size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))#### Compute BFs within informed hypotheses framework (bain) ##################################### #
### for bain: compute vcov by hand ###
# create data frame to collect var and cov for each imputation
vcov_df <- data.frame(Grundschule = as.numeric(),
Gymnasium = as.numeric()
)
mean_df <- data.frame(Grundschule = as.numeric(),
Gymnasium = as.numeric()
)
# sample size per group
samp_gr <- table(mice::complete(imp90)$Schulart)
## loop over all m imputations
for(i in 1:m) {
fit_lm <- lm(dau_hw_min ~ Schulart-1, data = mice::complete(imp90, i))
var_lm <- summary(fit_lm)$sigma**2 # get variance of the means (VOM)
vcov_df[i, "Grundschule"] <- var_lm/samp_gr["Grundschule"] # compute VOM per group
vcov_df[i, "Gymnasium"] <- var_lm/samp_gr["Gymnasium"] # compute VOM per group
# collect estimates of the means
mean_df[i, "Grundschule"] <- coef(fit_lm)["SchulartGrundschule"]
mean_df[i, "Gymnasium"] <- coef(fit_lm)["SchulartGymnasium"]
rm(fit_lm, var_lm) # clean up, because we like it tidy in here
}
## make var matrices
# compute the mean over var
vcov_df <- vcov_df %>%
summarize_all(mean)
# create matrices
mat1 <- matrix(vcov_df$Grundschule, 1, 1)
mat2 <- matrix(vcov_df$Gymnasium, 1, 1)
variances <- list(mat1, mat2)
## compute mean of estimates
bf_data <- mean_df %>%
summarize_all(mean)
bf_data <- as.numeric(bf_data)
names(bf_data) <- c("Grund", "Gym")
## BAIN ##### #
# generating hypotheses
hypotheses <- "Gym = Grund; Gym < Grund; Gym > Grund"
bf_90 <- bain(bf_data,
hypothesis = hypotheses,
n = samp_gr, # size of the groups
Sigma = variances, # matrices of residual variances of groups
group_parameters = 1, # there is 1 group specific parameter (the mean in each group)
joint_parameters = 0 # there are no parameters that apply to each of the groups (e.g. the regression coefficient of a covariate)
)
print(bf_90)## Bayesian informative hypothesis testing for an object of class numeric:
##
## Fit_eq Com_eq Fit_in Com_in Fit Com BF PMPa PMPb
## H1 0.001 0.039 1.000 1.000 0.001 0.039 0.025 0.012 0.008
## H2 1.000 1.000 1.000 0.500 1.000 0.500 2896.832 0.987 0.661
## H3 1.000 1.000 0.000 0.500 0.000 0.500 0.000 0.000 0.000
## Hu 0.331
##
## Hypotheses:
## H1: Gym=Grund
## H2: Gym<Grund
## H3: Gym>Grund
##
## Note: BF denotes the Bayes factor of the hypothesis at hand versus its complement.
## H1 H2 H3
## H1 1.0000000 0.0126602106 36.67451
## H2 78.9876273 1.0000000000 2896.83229
## H3 0.0272669 0.0003452047 1.00000
###### DESCRIPTIVE PLOT ############################################### #
ggplot(p_data%>%dplyr::filter(stunde == 90), aes(x=Schulart, y = dau_hw_min, fill = Schulart)) +
geom_flat_violin(position = position_nudge(x = .1, y = 0),
adjust = 1.6, trim = FALSE, alpha = .3, colour = NA) +
geom_point(position = position_jitter(width = .05), size = 1, alpha = 0.5, aes(color=Schulart)) +
geom_boxplot(position = position_nudge(x = -.15, y = 0),
outlier.shape = NA, alpha = .5, width = .1, colour = "black") +
scale_colour_brewer(palette = "Set1")+
scale_y_continuous(expand = c(0, 0)) +
scale_fill_brewer(palette = "Set1") +
ylab("Minuten") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))Aufgrund fehlender Standards im pooling von Bayes Faktoren (BF), berechnen wir für jeden Datensatz einen Bayes Faktor und berichten anschließend deren Verteilung.
# Compute density of BFs
kl_results <- data.frame()
for (i in 1:m) {
fit <- correlationBF(y = mice::complete(imp90, i)$dau_hw_min, # correlation
x = mice::complete(imp90, i)$Klassenstufe)
kl_results[i,"dau_hw_min-Klassenstufe"] <- extractBF(fit)$bf # save BF in data frame
rm(fit)
}
# show summary of BFs
summary(kl_results[,"dau_hw_min-Klassenstufe"]) ## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.9624 14.4547 23.2238 21.3723 26.8566 40.5913
# plot BFs distribution
ggplot(kl_results, aes(x = `dau_hw_min-Klassenstufe`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `dau_hw_min-Klassenstufe`, y=""),
height = 1, width = 0, alpha = .3,
size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4,
size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4,
size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))descriptives <- p_data %>%
dplyr::select(lh_ank, lh_auf, lh_nen, lh_sch,
lh_erl, lh_wfr, lh_wno, lh_ano,
lh_bfr, sh_auf, sh_mel, sh_fra, sh_not) %>%
psych::describeBy(group = p_data$Schulart)
panderOptions('digits', 3)
pander::pander(descriptives)Grundschule:
| vars | n | mean | sd | median | trimmed | mad | min | |
|---|---|---|---|---|---|---|---|---|
| lh_ank | 1 | 322 | 0.925 | 0.263 | 1 | 1.000 | 0.00 | 0 |
| lh_auf | 2 | 320 | 0.809 | 0.393 | 1 | 0.887 | 0.00 | 0 |
| lh_nen | 3 | 319 | 0.947 | 0.225 | 1 | 1.000 | 0.00 | 0 |
| lh_sch | 4 | 319 | 0.715 | 0.452 | 1 | 0.767 | 0.00 | 0 |
| lh_erl | 5 | 320 | 0.684 | 0.465 | 1 | 0.730 | 0.00 | 0 |
| lh_wfr | 6 | 320 | 0.241 | 0.428 | 0 | 0.176 | 0.00 | 0 |
| lh_wno | 7 | 322 | 0.696 | 0.461 | 1 | 0.744 | 0.00 | 0 |
| lh_ano | 8 | 321 | 0.586 | 0.493 | 1 | 0.607 | 0.00 | 0 |
| lh_bfr | 9 | 320 | 0.781 | 1.499 | 0 | 0.480 | 0.00 | 0 |
| sh_auf | 10 | 320 | 4.169 | 0.968 | 4 | 4.316 | 1.48 | 1 |
| sh_mel | 11 | 321 | 0.611 | 1.207 | 0 | 0.319 | 0.00 | 0 |
| sh_fra | 12 | 320 | 0.844 | 1.648 | 0 | 0.492 | 0.00 | 0 |
| sh_not | 13 | 321 | 71.240 | 38.780 | 90 | 76.529 | 14.83 | 0 |
| max | range | skew | kurtosis | se | |
|---|---|---|---|---|---|
| lh_ank | 1 | 1 | -3.225 | 8.426 | 0.015 |
| lh_auf | 1 | 1 | -1.568 | 0.460 | 0.022 |
| lh_nen | 1 | 1 | -3.959 | 13.716 | 0.013 |
| lh_sch | 1 | 1 | -0.947 | -1.107 | 0.025 |
| lh_erl | 1 | 1 | -0.790 | -1.381 | 0.026 |
| lh_wfr | 1 | 1 | 1.208 | -0.543 | 0.024 |
| lh_wno | 1 | 1 | -0.846 | -1.287 | 0.026 |
| lh_ano | 1 | 1 | -0.346 | -1.886 | 0.028 |
| lh_bfr | 15 | 15 | 4.395 | 31.258 | 0.084 |
| sh_auf | 5 | 4 | -1.230 | 1.291 | 0.054 |
| sh_mel | 9 | 9 | 2.668 | 9.296 | 0.067 |
| sh_fra | 15 | 15 | 3.862 | 22.788 | 0.092 |
| sh_not | 100 | 100 | -1.055 | -0.590 | 2.164 |
Gymnasium:
| vars | n | mean | sd | median | trimmed | mad | min | |
|---|---|---|---|---|---|---|---|---|
| lh_ank | 1 | 183 | 0.770 | 0.422 | 1 | 0.837 | 0.00 | 0 |
| lh_auf | 2 | 181 | 0.591 | 0.493 | 1 | 0.614 | 0.00 | 0 |
| lh_nen | 3 | 123 | 0.967 | 0.178 | 1 | 1.000 | 0.00 | 0 |
| lh_sch | 4 | 183 | 0.628 | 0.485 | 1 | 0.660 | 0.00 | 0 |
| lh_erl | 5 | 183 | 0.650 | 0.478 | 1 | 0.687 | 0.00 | 0 |
| lh_wfr | 6 | 184 | 0.207 | 0.406 | 0 | 0.135 | 0.00 | 0 |
| lh_wno | 7 | 184 | 0.321 | 0.468 | 0 | 0.277 | 0.00 | 0 |
| lh_ano | 8 | 179 | 0.196 | 0.398 | 0 | 0.124 | 0.00 | 0 |
| lh_bfr | 9 | 121 | 0.661 | 1.029 | 0 | 0.443 | 0.00 | 0 |
| sh_auf | 10 | 108 | 3.815 | 1.034 | 4 | 3.898 | 1.48 | 1 |
| sh_mel | 11 | 123 | 0.602 | 0.981 | 0 | 0.394 | 0.00 | 0 |
| sh_fra | 12 | 183 | 0.705 | 1.218 | 0 | 0.429 | 0.00 | 0 |
| sh_not | 13 | 171 | 59.865 | 37.651 | 70 | 62.314 | 44.48 | 0 |
| max | range | skew | kurtosis | se | |
|---|---|---|---|---|---|
| lh_ank | 1 | 1 | -1.276 | -0.374 | 0.031 |
| lh_auf | 1 | 1 | -0.368 | -1.875 | 0.037 |
| lh_nen | 1 | 1 | -5.207 | 25.317 | 0.016 |
| lh_sch | 1 | 1 | -0.527 | -1.731 | 0.036 |
| lh_erl | 1 | 1 | -0.625 | -1.618 | 0.035 |
| lh_wfr | 1 | 1 | 1.438 | 0.069 | 0.030 |
| lh_wno | 1 | 1 | 0.762 | -1.427 | 0.035 |
| lh_ano | 1 | 1 | 1.523 | 0.320 | 0.030 |
| lh_bfr | 6 | 6 | 2.433 | 7.657 | 0.094 |
| sh_auf | 5 | 4 | -0.435 | -0.793 | 0.099 |
| sh_mel | 6 | 6 | 2.350 | 7.428 | 0.088 |
| sh_fra | 7 | 7 | 2.569 | 7.707 | 0.090 |
| sh_not | 100 | 100 | -0.443 | -1.345 | 2.879 |
Aufgrund fehlender Standards im pooling von Bayes Faktoren (BF), berechnen wir für jeden Datensatz einen Bayes Faktor und berichten anschließend deren Verteilung.
# Compute density of BFs
cor_results <- data.frame()
for (i in 1:m) {
imp_m <- matrix(table(mice::complete(imp, i)$lh_ank, # create contingency table
mice::complete(imp, i)$Schulart), 2, 2)
fit <- contingencyTableBF(imp_m, sampleType="indepMulti",
fixedMargin = "cols") # GS and GY as fixed
cor_results[i,"lh_ank-Schulart"] <- extractBF(fit)$bf # save BF in data frame
cor_results[i,"phi"] <- phi(imp_m, digits = 3) # get phi coefficient for effect size
rm(imp_m, fit)
}
# show summary of BFs
summary(cor_results[,"lh_ank-Schulart"]) ## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1730 9621 9621 9294 9621 38702
# plot BFs distribution
ggplot(cor_results, aes(x = `lh_ank-Schulart`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `lh_ank-Schulart`, y=""),
height = 1, width = 0, alpha = .3,
size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4,
size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4,
size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))### effect size: phi #########
# In order to combine correlation estimates via Rubin's Rules we first
# have to apply Fisher-Transformation for Correlation to z-Score as
# correlations are not normally distributed and then transform the
# values back to correlation coefficients.
# apply Fischer transformation
cor_results_phi_z <- FisherZ(cor_results$phi)
# combine estimates
cor_results_phi_z <- mean(cor_results_phi_z)
# transform back to correlation coefficients
phi <- FisherZInv(cor_results_phi_z)Effektstärke: \(\varphi\)= -0.218
Aufgrund fehlender Standards im pooling von Bayes Faktoren (BF), berechnen wir für jeden Datensatz einen Bayes Faktor und berichten anschließend deren Verteilung.
# Compute density of BFs
kl_results <- data.frame()
for (i in 1:m) {
fit <- correlationBF(y = mice::complete(imp, i)$lh_ank, # correlation
x = mice::complete(imp, i)$Klassenstufe)
kl_results[i,"lh_ank-Klassenstufe"] <- extractBF(fit)$bf # save BF in data frame
rm(fit)
}
# show summary of BFs
summary(kl_results[,"lh_ank-Klassenstufe"]) ## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2093 18554 19939 17798 20601 27261
# plot BFs distribution
ggplot(kl_results, aes(x = `lh_ank-Klassenstufe`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `lh_ank-Klassenstufe`, y=""),
height = 1, width = 0, alpha = .3,
size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4,
size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4,
size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))###### DESCRIPTIVE PLOT ############################################### #
# plot (non-missing) values as descriptive
lh_ank_p <- p_data %>%
dplyr::filter(!is.na(lh_ank) & !is.na(Klassenstufe)) %>%
group_by(Klassenstufe) %>%
mutate(length = length(lh_ank)) %>%
summarize(lh_ank = mean(lh_ank, na.rm=T),
length_n = mean(length)) %>%
ungroup() %>%
mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
Schulart = as.factor(case_when(
Klassenstufe == "1" ~ "Grundschule",
Klassenstufe == "1_2" ~ "Grundschule",
Klassenstufe == "2" ~ "Grundschule",
Klassenstufe == "3" ~ "Grundschule",
Klassenstufe == "4" ~ "Grundschule",
TRUE ~ "Gymnasium"
)))
ggplot(lh_ank_p, aes(x = Klassenstufe, y = lh_ank, color = Schulart)) +
geom_line(aes(group = NA), color = "grey", size = 1) +
geom_point(aes(size = length_n)) +
geom_text(aes(label = paste(round(lh_ank*100,0), "%"), vjust = 3)) +
scale_y_continuous(limits = c(0,1), expand = c(0,0)) +
geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
xlab("Klassenstufe") +
ylab("% Häufigkeit") +
labs(size = "Anzahl\neingegangener\nStunden") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7")) Aufgrund fehlender Standards im pooling von Bayes Faktoren (BF), berechnen wir für jeden Datensatz einen Bayes Faktor und berichten anschließend deren Verteilung.
# Compute density of BFs
for (i in 1:m) {
imp_m <- matrix(table(mice::complete(imp, i)$lh_auf, mice::complete(imp, i)$Schulart), 2, 2)
fit <- contingencyTableBF(imp_m, sampleType="indepMulti",fixedMargin = "cols")
cor_results[i,"lh_auf-Schulart"] <- extractBF(fit)$bf
cor_results[i,"phi"] <- phi(imp_m, digits = 3) # get phi coefficient for effect size
rm(imp_m, fit)
}
summary(cor_results[,"lh_auf-Schulart"])## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 11132 71353 85423 131640 159914 774708
ggplot(cor_results, aes(x = `lh_auf-Schulart`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `lh_auf-Schulart`, y=""), height = 1, width = 0, alpha = .3, size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4, size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4, size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))### effect size: phi #########
# In order to combine correlation estimates via Rubin's Rules we first
# have to apply Fisher-Transformation for Correlation to z-Score as
# correlations are not normally distributed and then transform the
# values back to correlation coefficients.
# apply Fischer transformation
cor_results_phi_z <- FisherZ(cor_results$phi)
# combine estimates
cor_results_phi_z <- mean(cor_results_phi_z)
# transform back to correlation coefficients
phi <- FisherZInv(cor_results_phi_z)Effektstärke: \(\varphi\)= -0.236
Aufgrund fehlender Standards im pooling von Bayes Faktoren (BF), berechnen wir für jeden Datensatz einen Bayes Faktor und berichten anschließend deren Verteilung.
# Compute density of BFs
kl_results <- data.frame()
for (i in 1:m) {
fit <- correlationBF(y = mice::complete(imp, i)$lh_auf, # correlation
x = mice::complete(imp, i)$Klassenstufe)
kl_results[i,"lh_auf-Klassenstufe"] <- extractBF(fit)$bf # save BF in data frame
rm(fit)
}
# show summary of BFs
summary(kl_results[,"lh_auf-Klassenstufe"]) ## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 88811 596357 1085965 1381383 1685379 6867900
# plot BFs distribution
ggplot(kl_results, aes(x = `lh_auf-Klassenstufe`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `lh_auf-Klassenstufe`, y=""),
height = 1, width = 0, alpha = .3,
size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4,
size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4,
size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))###### DESCRIPTIVE PLOT ############################################### #
lh_auf_p <- p_data %>%
dplyr::filter(!is.na(lh_auf) & !is.na(Klassenstufe)) %>%
group_by(Klassenstufe) %>%
mutate(length = length(lh_auf)) %>%
summarize(lh_auf = mean(lh_auf, na.rm=T),
length_n = mean(length)) %>%
ungroup() %>%
mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
Schulart = as.factor(case_when(
Klassenstufe == "1" ~ "Grundschule",
Klassenstufe == "1_2" ~ "Grundschule",
Klassenstufe == "2" ~ "Grundschule",
Klassenstufe == "3" ~ "Grundschule",
Klassenstufe == "4" ~ "Grundschule",
TRUE ~ "Gymnasium"
)))
ggplot(lh_auf_p, aes(x = Klassenstufe, y = lh_auf, color = Schulart)) +
geom_line(aes(group = NA), color = "grey", size = 1) +
geom_point(aes(size = length_n)) +
geom_text(aes(label = paste(round(lh_auf*100,0), "%"), vjust = 3)) +
scale_y_continuous(limits = c(0,1), expand = c(0,0)) +
geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
xlab("Klassenstufe") +
ylab("% Häufigkeit") +
labs(size = "Anzahl\neingegangener\nStunden") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7")) Aufgrund fehlender Standards im pooling von Bayes Faktoren (BF), berechnen wir für jeden Datensatz einen Bayes Faktor und berichten anschließend deren Verteilung.
# Compute density of BFs
for (i in 1:m) {
imp_m <- matrix(table(mice::complete(imp, i)$lh_nen, mice::complete(imp, i)$Schulart), 2, 2)
fit <- contingencyTableBF(imp_m, sampleType="indepMulti",fixedMargin = "cols")
cor_results[i,"lh_nen-Schulart"] <- extractBF(fit)$bf
cor_results[i,"phi"] <- phi(imp_m, digits = 3) # get phi coefficient for effect size
rm(imp_m, fit)
}
summary(cor_results[,"lh_nen-Schulart"])## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.05133 0.05412 0.06194 0.08576 0.07829 3.59638
ggplot(cor_results, aes(x = `lh_nen-Schulart`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `lh_nen-Schulart`, y=""), height = 1, width = 0, alpha = .3, size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4, size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4, size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))### effect size: phi #########
# In order to combine correlation estimates via Rubin's Rules we first
# have to apply Fisher-Transformation for Correlation to z-Score as
# correlations are not normally distributed and then transform the
# values back to correlation coefficients.
# apply Fischer transformation
cor_results_phi_z <- FisherZ(cor_results$phi)
# combine estimates
cor_results_phi_z <- mean(cor_results_phi_z)
# transform back to correlation coefficients
phi <- FisherZInv(cor_results_phi_z)Effektstärke: \(\varphi\)= 0.009
Aufgrund fehlender Standards im pooling von Bayes Faktoren (BF), berechnen wir für jeden Datensatz einen Bayes Faktor und berichten anschließend deren Verteilung.
# Compute density of BFs
kl_results <- data.frame()
for (i in 1:m) {
fit <- correlationBF(y = mice::complete(imp, i)$lh_nen, # correlation
x = mice::complete(imp, i)$Klassenstufe)
kl_results[i,"lh_nen-Klassenstufe"] <- extractBF(fit)$bf # save BF in data frame
rm(fit)
}
# show summary of BFs
summary(kl_results[,"lh_nen-Klassenstufe"]) ## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.1036 0.1111 0.1391 0.8039 0.2199 504.1839
# plot BFs distribution
ggplot(kl_results, aes(x = `lh_nen-Klassenstufe`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `lh_nen-Klassenstufe`, y=""),
height = 1, width = 0, alpha = .3,
size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4,
size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4,
size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))###### DESCRIPTIVE PLOT ############################################### #
lh_nen_p <- p_data %>%
dplyr::filter(!is.na(lh_nen) & !is.na(Klassenstufe)) %>%
group_by(Klassenstufe) %>%
mutate(length = length(lh_nen)) %>%
summarize(lh_nen = mean(lh_nen, na.rm=T),
length_n = mean(length)) %>%
ungroup() %>%
mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
Schulart = as.factor(case_when(
Klassenstufe == "1" ~ "Grundschule",
Klassenstufe == "1_2" ~ "Grundschule",
Klassenstufe == "2" ~ "Grundschule",
Klassenstufe == "3" ~ "Grundschule",
Klassenstufe == "4" ~ "Grundschule",
TRUE ~ "Gymnasium"
)))
ggplot(lh_nen_p, aes(x = Klassenstufe, y = lh_nen, color = Schulart)) +
geom_line(aes(group = NA), color = "grey", size = 1) +
geom_point(aes(size = length_n)) +
geom_text(aes(label = paste(round(lh_nen*100,0), "%"), vjust = 3)) +
scale_y_continuous(limits = c(0,1), expand = c(0,0)) +
geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
xlab("Klassenstufe") +
ylab("% Häufigkeit") +
labs(size = "Anzahl\neingegangener\nStunden") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7")) Aufgrund fehlender Standards im pooling von Bayes Faktoren (BF), berechnen wir für jeden Datensatz einen Bayes Faktor und berichten anschließend deren Verteilung.
# Compute density of BFs
for (i in 1:m) {
imp_m <- matrix(table(mice::complete(imp, i)$lh_sch, mice::complete(imp, i)$Schulart), 2, 2)
fit <- contingencyTableBF(imp_m, sampleType="indepMulti",fixedMargin = "cols")
cor_results[i,"lh_sch-Schulart"] <- extractBF(fit)$bf
cor_results[i,"phi"] <- phi(imp_m, digits = 3) # get phi coefficient for effect size
rm(imp_m, fit)
}
summary(cor_results[,"lh_sch-Schulart"])## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.4367 0.6171 0.7092 0.7208 0.8198 1.4633
ggplot(cor_results, aes(x = `lh_sch-Schulart`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `lh_sch-Schulart`, y=""), height = 1, width = 0, alpha = .3, size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4, size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4, size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))### effect size: phi #########
# In order to combine correlation estimates via Rubin's Rules we first
# have to apply Fisher-Transformation for Correlation to z-Score as
# correlations are not normally distributed and then transform the
# values back to correlation coefficients.
# apply Fischer transformation
cor_results_phi_z <- FisherZ(cor_results$phi)
# combine estimates
cor_results_phi_z <- mean(cor_results_phi_z)
# transform back to correlation coefficients
phi <- FisherZInv(cor_results_phi_z)Effektstärke: \(\varphi\)= -0.086
Aufgrund fehlender Standards im pooling von Bayes Faktoren (BF), berechnen wir für jeden Datensatz einen Bayes Faktor und berichten anschließend deren Verteilung.
# Compute density of BFs
kl_results <- data.frame()
for (i in 1:m) {
fit <- correlationBF(y = mice::complete(imp, i)$lh_sch, # correlation
x = mice::complete(imp, i)$Klassenstufe)
kl_results[i,"lh_sch-Klassenstufe"] <- extractBF(fit)$bf # save BF in data frame
rm(fit)
}
# show summary of BFs
summary(kl_results[,"lh_sch-Klassenstufe"]) ## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.1986 0.2669 0.2969 0.3188 0.3514 0.6692
# plot BFs distribution
ggplot(kl_results, aes(x = `lh_sch-Klassenstufe`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `lh_sch-Klassenstufe`, y=""),
height = 1, width = 0, alpha = .3,
size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4,
size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4,
size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))###### DESCRIPTIVE PLOT ############################################### #
lh_sch_p <- p_data %>%
dplyr::filter(!is.na(lh_sch) & !is.na(Klassenstufe)) %>%
group_by(Klassenstufe) %>%
mutate(length = length(lh_sch)) %>%
summarize(lh_sch = mean(lh_sch, na.rm=T),
length_n = mean(length)) %>%
ungroup() %>%
mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
Schulart = as.factor(case_when(
Klassenstufe == "1" ~ "Grundschule",
Klassenstufe == "1_2" ~ "Grundschule",
Klassenstufe == "2" ~ "Grundschule",
Klassenstufe == "3" ~ "Grundschule",
Klassenstufe == "4" ~ "Grundschule",
TRUE ~ "Gymnasium"
)))
ggplot(lh_sch_p, aes(x = Klassenstufe, y = lh_sch, color = Schulart)) +
geom_line(aes(group = NA), color = "grey", size = 1) +
geom_point(aes(size = length_n)) +
geom_text(aes(label = paste(round(lh_sch*100,0), "%"), vjust = 3)) +
scale_y_continuous(limits = c(0,1), expand = c(0,0)) +
geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
xlab("Klassenstufe") +
ylab("% Häufigkeit") +
labs(size = "Anzahl\neingegangener\nStunden") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7")) Aufgrund fehlender Standards im pooling von Bayes Faktoren (BF), berechnen wir für jeden Datensatz einen Bayes Faktor und berichten anschließend deren Verteilung.
# Compute density of BFs
for (i in 1:m) {
imp_m <- matrix(table(mice::complete(imp, i)$lh_erl, mice::complete(imp, i)$Schulart), 2, 2)
fit <- contingencyTableBF(imp_m, sampleType="indepMulti",fixedMargin = "cols")
cor_results[i,"lh_erl-Schulart"] <- extractBF(fit)$bf
cor_results[i,"phi"] <- phi(imp_m, digits = 3) # get phi coefficient for effect size
rm(imp_m, fit)
}
summary(cor_results[,"lh_erl-Schulart"])## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.1248 0.1411 0.1488 0.1512 0.1578 0.1906
ggplot(cor_results, aes(x = `lh_erl-Schulart`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `lh_erl-Schulart`, y=""), height = 1, width = 0, alpha = .3, size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4, size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4, size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))### effect size: phi #########
# In order to combine correlation estimates via Rubin's Rules we first
# have to apply Fisher-Transformation for Correlation to z-Score as
# correlations are not normally distributed and then transform the
# values back to correlation coefficients.
# apply Fischer transformation
cor_results_phi_z <- FisherZ(cor_results$phi)
# combine estimates
cor_results_phi_z <- mean(cor_results_phi_z)
# transform back to correlation coefficients
phi <- FisherZInv(cor_results_phi_z)Effektstärke: \(\varphi\)= -0.036
Aufgrund fehlender Standards im pooling von Bayes Faktoren (BF), berechnen wir für jeden Datensatz einen Bayes Faktor und berichten anschließend deren Verteilung.
# Compute density of BFs
kl_results <- data.frame()
for (i in 1:m) {
fit <- correlationBF(y = mice::complete(imp, i)$lh_erl, # correlation
x = mice::complete(imp, i)$Klassenstufe)
kl_results[i,"lh_erl-Klassenstufe"] <- extractBF(fit)$bf # save BF in data frame
rm(fit)
}
# show summary of BFs
summary(kl_results[,"lh_erl-Klassenstufe"]) ## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.1253 0.1460 0.1560 0.1595 0.1697 0.2328
# plot BFs distribution
ggplot(kl_results, aes(x = `lh_erl-Klassenstufe`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `lh_erl-Klassenstufe`, y=""),
height = 1, width = 0, alpha = .3,
size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4,
size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4,
size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))###### DESCRIPTIVE PLOT ############################################### #
lh_erl_p <- p_data %>%
dplyr::filter(!is.na(lh_erl) & !is.na(Klassenstufe)) %>%
group_by(Klassenstufe) %>%
mutate(length = length(lh_erl)) %>%
summarize(lh_erl = mean(lh_erl, na.rm=T),
length_n = mean(length)) %>%
ungroup() %>%
mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
Schulart = as.factor(case_when(
Klassenstufe == "1" ~ "Grundschule",
Klassenstufe == "1_2" ~ "Grundschule",
Klassenstufe == "2" ~ "Grundschule",
Klassenstufe == "3" ~ "Grundschule",
Klassenstufe == "4" ~ "Grundschule",
TRUE ~ "Gymnasium"
)))
ggplot(lh_erl_p, aes(x = Klassenstufe, y = lh_erl, color = Schulart)) +
geom_line(aes(group = NA), color = "grey", size = 1) +
geom_point(aes(size = length_n)) +
geom_text(aes(label = paste(round(lh_erl*100,0), "%"), vjust = 3)) +
scale_y_continuous(limits = c(0,1), expand = c(0,0)) +
geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
xlab("Klassenstufe") +
ylab("% Häufigkeit") +
labs(size = "Anzahl\neingegangener\nStunden") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7")) Aufgrund fehlender Standards im pooling von Bayes Faktoren (BF), berechnen wir für jeden Datensatz einen Bayes Faktor und berichten anschließend deren Verteilung.
# Compute density of BFs
for (i in 1:m) {
imp_m <- matrix(table(mice::complete(imp, i)$lh_wfr, mice::complete(imp, i)$Schulart), 2, 2)
fit <- contingencyTableBF(imp_m, sampleType="indepMulti",fixedMargin = "cols")
cor_results[i,"lh_wfr-Schulart"] <- extractBF(fit)$bf
cor_results[i,"phi"] <- phi(imp_m, digits = 3) # get phi coefficient for effect size
rm(imp_m, fit)
}
summary(cor_results[,"lh_wfr-Schulart"])## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.1198 0.1420 0.1530 0.1509 0.1530 0.1976
ggplot(cor_results, aes(x = `lh_wfr-Schulart`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `lh_wfr-Schulart`, y=""), height = 1, width = 0, alpha = .3, size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4, size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4, size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))### effect size: phi #########
# In order to combine correlation estimates via Rubin's Rules we first
# have to apply Fisher-Transformation for Correlation to z-Score as
# correlations are not normally distributed and then transform the
# values back to correlation coefficients.
# apply Fischer transformation
cor_results_phi_z <- FisherZ(cor_results$phi)
# combine estimates
cor_results_phi_z <- mean(cor_results_phi_z)
# transform back to correlation coefficients
phi <- FisherZInv(cor_results_phi_z)Effektstärke: \(\varphi\)= -0.042
Aufgrund fehlender Standards im pooling von Bayes Faktoren (BF), berechnen wir für jeden Datensatz einen Bayes Faktor und berichten anschließend deren Verteilung.
# Compute density of BFs
kl_results <- data.frame()
for (i in 1:m) {
fit <- correlationBF(y = mice::complete(imp, i)$lh_wfr, # correlation
x = mice::complete(imp, i)$Klassenstufe)
kl_results[i,"lh_wfr-Klassenstufe"] <- extractBF(fit)$bf # save BF in data frame
rm(fit)
}
# show summary of BFs
summary(kl_results[,"lh_wfr-Klassenstufe"]) ## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.1036 0.1076 0.1102 0.1106 0.1132 0.1267
# plot BFs distribution
ggplot(kl_results, aes(x = `lh_wfr-Klassenstufe`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `lh_wfr-Klassenstufe`, y=""),
height = 1, width = 0, alpha = .3,
size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4,
size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4,
size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))###### DESCRIPTIVE PLOT ############################################### #
lh_wfr_p <- p_data %>%
dplyr::filter(!is.na(lh_wfr) & !is.na(Klassenstufe)) %>%
group_by(Klassenstufe) %>%
mutate(length = length(lh_wfr)) %>%
summarize(lh_wfr = mean(lh_wfr, na.rm=T),
length_n = mean(length)) %>%
ungroup() %>%
mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
Schulart = as.factor(case_when(
Klassenstufe == "1" ~ "Grundschule",
Klassenstufe == "1_2" ~ "Grundschule",
Klassenstufe == "2" ~ "Grundschule",
Klassenstufe == "3" ~ "Grundschule",
Klassenstufe == "4" ~ "Grundschule",
TRUE ~ "Gymnasium"
)))
ggplot(lh_wfr_p, aes(x = Klassenstufe, y = lh_wfr, color = Schulart)) +
geom_line(aes(group = NA), color = "grey", size = 1) +
geom_point(aes(size = length_n)) +
geom_text(aes(label = paste(round(lh_wfr*100,0), "%"), vjust = 3)) +
scale_y_continuous(limits = c(0,1), expand = c(0,0)) +
geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
xlab("Klassenstufe") +
ylab("% Häufigkeit") +
labs(size = "Anzahl\neingegangener\nStunden") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7")) Aufgrund fehlender Standards im pooling von Bayes Faktoren (BF), berechnen wir für jeden Datensatz einen Bayes Faktor und berichten anschließend deren Verteilung.
# Compute density of BFs
for (i in 1:m) {
imp_m <- matrix(table(mice::complete(imp, i)$lh_wno, mice::complete(imp, i)$Schulart), 2, 2)
fit <- contingencyTableBF(imp_m, sampleType="indepMulti",fixedMargin = "cols")
cor_results[i,"lh_wno-Schulart"] <- extractBF(fit)$bf
cor_results[i,"phi"] <- phi(imp_m, digits = 3) # get phi coefficient for effect size
rm(imp_m, fit)
}
summary(cor_results[,"lh_wno-Schulart"])## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.754e+13 5.581e+13 8.324e+13 9.702e+13 1.502e+14 1.502e+14
ggplot(cor_results, aes(x = `lh_wno-Schulart`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `lh_wno-Schulart`, y=""), height = 1, width = 0, alpha = .3, size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4, size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4, size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))### effect size: phi #########
# In order to combine correlation estimates via Rubin's Rules we first
# have to apply Fisher-Transformation for Correlation to z-Score as
# correlations are not normally distributed and then transform the
# values back to correlation coefficients.
# apply Fischer transformation
cor_results_phi_z <- FisherZ(cor_results$phi)
# combine estimates
cor_results_phi_z <- mean(cor_results_phi_z)
# transform back to correlation coefficients
phi <- FisherZInv(cor_results_phi_z)Effektstärke: \(\varphi\)= -0.365
Aufgrund fehlender Standards im pooling von Bayes Faktoren (BF), berechnen wir für jeden Datensatz einen Bayes Faktor und berichten anschließend deren Verteilung.
# Compute density of BFs
kl_results <- data.frame()
for (i in 1:m) {
fit <- correlationBF(y = mice::complete(imp, i)$lh_wno, # correlation
x = mice::complete(imp, i)$Klassenstufe)
kl_results[i,"lh_wno-Klassenstufe"] <- extractBF(fit)$bf # save BF in data frame
rm(fit)
}
# show summary of BFs
summary(kl_results[,"lh_wno-Klassenstufe"]) ## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.011e+12 9.610e+12 2.044e+13 2.122e+13 3.116e+13 5.261e+13
# plot BFs distribution
ggplot(kl_results, aes(x = `lh_wno-Klassenstufe`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `lh_wno-Klassenstufe`, y=""),
height = 1, width = 0, alpha = .3,
size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4,
size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4,
size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))###### DESCRIPTIVE PLOT ############################################### #
lh_wno_p <- p_data %>%
dplyr::filter(!is.na(lh_wno) & !is.na(Klassenstufe)) %>%
group_by(Klassenstufe) %>%
mutate(length = length(lh_wno)) %>%
summarize(lh_wno = mean(lh_wno, na.rm=T),
length_n = mean(length)) %>%
ungroup() %>%
mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
Schulart = as.factor(case_when(
Klassenstufe == "1" ~ "Grundschule",
Klassenstufe == "1_2" ~ "Grundschule",
Klassenstufe == "2" ~ "Grundschule",
Klassenstufe == "3" ~ "Grundschule",
Klassenstufe == "4" ~ "Grundschule",
TRUE ~ "Gymnasium"
)))
ggplot(lh_wno_p, aes(x = Klassenstufe, y = lh_wno, color = Schulart)) +
geom_line(aes(group = NA), color = "grey", size = 1) +
geom_point(aes(size = length_n)) +
geom_text(aes(label = paste(round(lh_wno*100,0), "%"), vjust = 3)) +
scale_y_continuous(limits = c(0,1), expand = c(0,0)) +
geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
xlab("Klassenstufe") +
ylab("% Häufigkeit") +
labs(size = "Anzahl\neingegangener\nStunden") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7")) Aufgrund fehlender Standards im pooling von Bayes Faktoren (BF), berechnen wir für jeden Datensatz einen Bayes Faktor und berichten anschließend deren Verteilung.
# Compute density of BFs
for (i in 1:m) {
imp_m <- matrix(table(mice::complete(imp, i)$lh_ano, mice::complete(imp, i)$Schulart), 2, 2)
fit <- contingencyTableBF(imp_m, sampleType="indepMulti",fixedMargin = "cols")
cor_results[i,"lh_ano-Schulart"] <- extractBF(fit)$bf
cor_results[i,"phi"] <- phi(imp_m, digits = 3) # get phi coefficient for effect size
rm(imp_m, fit)
}
summary(cor_results[,"lh_ano-Schulart"])## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.479e+14 4.433e+15 1.396e+16 1.711e+16 2.613e+16 4.685e+16
ggplot(cor_results, aes(x = `lh_ano-Schulart`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `lh_ano-Schulart`, y=""), height = 1, width = 0, alpha = .3, size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4, size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4, size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))### effect size: phi #########
# In order to combine correlation estimates via Rubin's Rules we first
# have to apply Fisher-Transformation for Correlation to z-Score as
# correlations are not normally distributed and then transform the
# values back to correlation coefficients.
# apply Fischer transformation
cor_results_phi_z <- FisherZ(cor_results$phi)
# combine estimates
cor_results_phi_z <- mean(cor_results_phi_z)
# transform back to correlation coefficients
phi <- FisherZInv(cor_results_phi_z)Effektstärke: \(\varphi\)= -0.382
Aufgrund fehlender Standards im pooling von Bayes Faktoren (BF), berechnen wir für jeden Datensatz einen Bayes Faktor und berichten anschließend deren Verteilung.
# Compute density of BFs
kl_results <- data.frame()
for (i in 1:m) {
fit <- correlationBF(y = mice::complete(imp, i)$lh_ano, # correlation
x = mice::complete(imp, i)$Klassenstufe)
kl_results[i,"lh_ano-Klassenstufe"] <- extractBF(fit)$bf # save BF in data frame
rm(fit)
}
# show summary of BFs
summary(kl_results[,"lh_ano-Klassenstufe"]) ## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.252e+13 2.947e+14 6.727e+14 9.168e+14 1.349e+15 5.043e+15
# plot BFs distribution
ggplot(kl_results, aes(x = `lh_ano-Klassenstufe`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `lh_ano-Klassenstufe`, y=""),
height = 1, width = 0, alpha = .3,
size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4,
size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4,
size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))###### DESCRIPTIVE PLOT ############################################### #
lh_ano_p <- p_data %>%
dplyr::filter(!is.na(lh_ano) & !is.na(Klassenstufe)) %>%
group_by(Klassenstufe) %>%
mutate(length = length(lh_ano)) %>%
summarize(lh_ano = mean(lh_ano, na.rm=T),
length_n = mean(length)) %>%
ungroup() %>%
mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
Schulart = as.factor(case_when(
Klassenstufe == "1" ~ "Grundschule",
Klassenstufe == "1_2" ~ "Grundschule",
Klassenstufe == "2" ~ "Grundschule",
Klassenstufe == "3" ~ "Grundschule",
Klassenstufe == "4" ~ "Grundschule",
TRUE ~ "Gymnasium"
)))
ggplot(lh_ano_p, aes(x = Klassenstufe, y = lh_ano, color = Schulart)) +
geom_line(aes(group = NA), color = "grey", size = 1) +
geom_point(aes(size = length_n)) +
geom_text(aes(label = paste(round(lh_ano*100,0), "%"), vjust = 3)) +
scale_y_continuous(limits = c(0,1), expand = c(0,0)) +
geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
xlab("Klassenstufe") +
ylab("% Häufigkeit") +
labs(size = "Anzahl\neingegangener\nStunden") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7")) #### Compute BFs within informed hypotheses framework (bain) ##################################### #
### for bain: compute vcov by hand ###
# create data frame to collect var and cov for each imputation
vcov_df <- data.frame(Grundschule = as.numeric(),
Gymnasium = as.numeric()
)
mean_df <- data.frame(Grundschule = as.numeric(),
Gymnasium = as.numeric()
)
# sample size per group
samp_gr <- table(mice::complete(imp)$Schulart)
## loop over all m imputations
for(i in 1:m) {
fit_lm <- lm(lh_bfr ~ Schulart-1, data = mice::complete(imp, i))
var_lm <- summary(fit_lm)$sigma**2 # get variance of the means (VOM)
vcov_df[i, "Grundschule"] <- var_lm/samp_gr["Grundschule"] # compute VOM per group
vcov_df[i, "Gymnasium"] <- var_lm/samp_gr["Gymnasium"] # compute VOM per group
# collect estimates of the means
mean_df[i, "Grundschule"] <- coef(fit_lm)["SchulartGrundschule"]
mean_df[i, "Gymnasium"] <- coef(fit_lm)["SchulartGymnasium"]
rm(fit_lm, var_lm) # clean up, because we like it tidy in here
}
## make var matrices
# compute the mean over var
vcov_df <- vcov_df %>%
summarize_all(mean)
# create matrices
mat1 <- matrix(vcov_df$Grundschule, 1, 1)
mat2 <- matrix(vcov_df$Gymnasium, 1, 1)
variances <- list(mat1, mat2)
## compute mean of estimates
bf_data <- mean_df %>%
summarize_all(mean)
bf_data <- as.numeric(bf_data)
names(bf_data) <- c("Grund", "Gym")
## BAIN ##### #
# generating hypotheses
hypotheses <- "Gym = Grund; Gym < Grund; Gym > Grund"
bf_hyp <- bain(bf_data,
hypothesis = hypotheses,
n = samp_gr, # size of the groups
Sigma = variances, # matrices of residual variances of groups
group_parameters = 1, # there is 1 group specific parameter (the mean in each group)
joint_parameters = 0 # there are no parameters that apply to each of the groups (e.g. the regression coefficient of a covariate)
)
print(bf_hyp)## Bayesian informative hypothesis testing for an object of class numeric:
##
## Fit_eq Com_eq Fit_in Com_in Fit Com BF PMPa PMPb
## H1 2.523 0.145 1.000 1.000 2.523 0.145 17.345 0.897 0.853
## H2 1.000 1.000 0.749 0.500 0.749 0.500 2.979 0.077 0.074
## H3 1.000 1.000 0.251 0.500 0.251 0.500 0.336 0.026 0.025
## Hu 0.049
##
## Hypotheses:
## H1: Gym=Grund
## H2: Gym<Grund
## H3: Gym>Grund
##
## Note: BF denotes the Bayes factor of the hypothesis at hand versus its complement.
## H1 H2 H3
## H1 1.00000000 11.5835903 34.510626
## H2 0.08632902 1.0000000 2.979269
## H3 0.02897658 0.3356529 1.000000
Aufgrund fehlender Standards im pooling von Bayes Faktoren (BF), berechnen wir für jeden Datensatz einen Bayes Faktor und berichten anschließend deren Verteilung.
# Compute density of BFs
kl_results <- data.frame()
for (i in 1:m) {
fit <- correlationBF(y = mice::complete(imp, i)$lh_bfr, # correlation
x = mice::complete(imp, i)$Klassenstufe)
kl_results[i,"lh_bfr-Klassenstufe"] <- extractBF(fit)$bf # save BF in data frame
rm(fit)
}
# show summary of BFs
summary(kl_results[,"lh_bfr-Klassenstufe"]) ## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.1036 0.1293 0.1512 0.1561 0.1770 0.3039
# plot BFs distribution
ggplot(kl_results, aes(x = `lh_bfr-Klassenstufe`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `lh_bfr-Klassenstufe`, y=""),
height = 1, width = 0, alpha = .3,
size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4,
size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4,
size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))###### DESCRIPTIVE PLOT ############################################### #
lh_bfr_p <- p_data %>%
dplyr::filter(!is.na(lh_bfr) & !is.na(Klassenstufe)) %>%
group_by(Klassenstufe) %>%
mutate(length = length(lh_bfr)) %>%
summarize(lh_bfr = mean(lh_bfr, na.rm=T),
length_n = mean(length)) %>%
ungroup() %>%
mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
Schulart = as.factor(case_when(
Klassenstufe == "1" ~ "Grundschule",
Klassenstufe == "1_2" ~ "Grundschule",
Klassenstufe == "2" ~ "Grundschule",
Klassenstufe == "3" ~ "Grundschule",
Klassenstufe == "4" ~ "Grundschule",
TRUE ~ "Gymnasium"
)))
ggplot(lh_bfr_p, aes(x = Klassenstufe, y = lh_bfr, color = Schulart)) +
geom_line(aes(group = NA), color = "grey", size = 1) +
geom_point(aes(size = length_n)) +
geom_text(aes(label = round(lh_bfr, 2), vjust = 3)) +
scale_y_continuous(limits = c(0,(max(lh_bfr_p$lh_bfr)*1.1)), expand = c(0,0)) +
geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
xlab("Klassenstufe") +
ylab("∅ absolute Häufigkeit") +
labs(size = "Anzahl\neingegangener\nStunden") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7")) #### Compute BFs within informed hypotheses framework (bain) ##################################### #
### for bain: compute vcov by hand ###
# create data frame to collect var and cov for each imputation
vcov_df <- data.frame(Grundschule = as.numeric(),
Gymnasium = as.numeric()
)
mean_df <- data.frame(Grundschule = as.numeric(),
Gymnasium = as.numeric()
)
# sample size per group
samp_gr <- table(mice::complete(imp)$Schulart)
## loop over all m imputations
for(i in 1:m) {
fit_lm <- lm(sh_auf ~ Schulart-1, data = mice::complete(imp, i))
var_lm <- summary(fit_lm)$sigma**2 # get variance of the means (VOM)
vcov_df[i, "Grundschule"] <- var_lm/samp_gr["Grundschule"] # compute VOM per group
vcov_df[i, "Gymnasium"] <- var_lm/samp_gr["Gymnasium"] # compute VOM per group
# collect estimates of the means
mean_df[i, "Grundschule"] <- coef(fit_lm)["SchulartGrundschule"]
mean_df[i, "Gymnasium"] <- coef(fit_lm)["SchulartGymnasium"]
rm(fit_lm, var_lm) # clean up, because we like it tidy in here
}
## make var matrices
# compute the mean over var
vcov_df <- vcov_df %>%
summarize_all(mean)
# create matrices
mat1 <- matrix(vcov_df$Grundschule, 1, 1)
mat2 <- matrix(vcov_df$Gymnasium, 1, 1)
variances <- list(mat1, mat2)
## compute mean of estimates
bf_data <- mean_df %>%
summarize_all(mean)
bf_data <- as.numeric(bf_data)
names(bf_data) <- c("Grund", "Gym")
## BAIN ##### #
# generating hypotheses
hypotheses <- "Gym = Grund; Gym < Grund; Gym > Grund"
bf_hyp <- bain(bf_data,
hypothesis = hypotheses,
n = samp_gr, # size of the groups
Sigma = variances, # matrices of residual variances of groups
group_parameters = 1, # there is 1 group specific parameter (the mean in each group)
joint_parameters = 0 # there are no parameters that apply to each of the groups (e.g. the regression coefficient of a covariate)
)
print(bf_hyp)## Bayesian informative hypothesis testing for an object of class numeric:
##
## Fit_eq Com_eq Fit_in Com_in Fit Com BF PMPa PMPb
## H1 0.000 0.199 1.000 1.000 0.000 0.199 0.001 0.001 0.000
## H2 1.000 1.000 1.000 0.500 1.000 0.500 176990.839 0.999 0.666
## H3 1.000 1.000 0.000 0.500 0.000 0.500 0.000 0.000 0.000
## Hu 0.333
##
## Hypotheses:
## H1: Gym=Grund
## H2: Gym<Grund
## H3: Gym>Grund
##
## Note: BF denotes the Bayes factor of the hypothesis at hand versus its complement.
## H1 H2 H3
## H1 1.000000e+00 7.07254e-04 125.1775
## H2 1.413919e+03 1.00000e+00 176990.8373
## H3 7.988658e-03 5.65001e-06 1.0000
Aufgrund fehlender Standards im pooling von Bayes Faktoren (BF), berechnen wir für jeden Datensatz einen Bayes Faktor und berichten anschließend deren Verteilung.
# Compute density of BFs
kl_results <- data.frame()
for (i in 1:m) {
fit <- correlationBF(y = mice::complete(imp, i)$sh_auf, # correlation
x = mice::complete(imp, i)$Klassenstufe)
kl_results[i,"sh_auf-Klassenstufe"] <- extractBF(fit)$bf # save BF in data frame
rm(fit)
}
# show summary of BFs
summary(kl_results[,"sh_auf-Klassenstufe"]) ## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.000e+00 1.440e+03 1.569e+04 1.176e+08 1.690e+05 6.877e+10
# plot BFs distribution
ggplot(kl_results, aes(x = `sh_auf-Klassenstufe`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `sh_auf-Klassenstufe`, y=""),
height = 1, width = 0, alpha = .3,
size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4,
size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4,
size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))###### DESCRIPTIVE PLOT ############################################### #
sh_auf_p <- p_data %>%
dplyr::filter(!is.na(sh_auf) & !is.na(Klassenstufe)) %>%
group_by(Klassenstufe) %>%
mutate(length = length(sh_auf)) %>%
summarize(sh_auf = mean(sh_auf, na.rm=T),
length_n = mean(length)) %>%
ungroup() %>%
mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
Schulart = as.factor(case_when(
Klassenstufe == "1" ~ "Grundschule",
Klassenstufe == "1_2" ~ "Grundschule",
Klassenstufe == "2" ~ "Grundschule",
Klassenstufe == "3" ~ "Grundschule",
Klassenstufe == "4" ~ "Grundschule",
TRUE ~ "Gymnasium"
)))
ggplot(sh_auf_p, aes(x = Klassenstufe, y = sh_auf, color = Schulart)) +
geom_line(aes(group = NA), color = "grey", size = 1) +
geom_point(aes(size = length_n)) +
geom_text(aes(label = round(sh_auf,1), vjust = -3)) +
scale_y_continuous(limits = c(1,5), expand = c(0,0)) +
geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
xlab("Klassenstufe") +
ylab("Zustimmung (Likert Item)") +
labs(size = "Anzahl\neingegangener\nStunden") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7")) #### Compute BFs within informed hypotheses framework (bain) ##################################### #
### for bain: compute vcov by hand ###
# create data frame to collect var and cov for each imputation
vcov_df <- data.frame(Grundschule = as.numeric(),
Gymnasium = as.numeric()
)
mean_df <- data.frame(Grundschule = as.numeric(),
Gymnasium = as.numeric()
)
# sample size per group
samp_gr <- table(mice::complete(imp)$Schulart)
## loop over all m imputations
for(i in 1:m) {
fit_lm <- lm(sh_mel ~ Schulart-1, data = mice::complete(imp, i))
var_lm <- summary(fit_lm)$sigma**2 # get variance of the means (VOM)
vcov_df[i, "Grundschule"] <- var_lm/samp_gr["Grundschule"] # compute VOM per group
vcov_df[i, "Gymnasium"] <- var_lm/samp_gr["Gymnasium"] # compute VOM per group
# collect estimates of the means
mean_df[i, "Grundschule"] <- coef(fit_lm)["SchulartGrundschule"]
mean_df[i, "Gymnasium"] <- coef(fit_lm)["SchulartGymnasium"]
rm(fit_lm, var_lm) # clean up, because we like it tidy in here
}
## make var matrices
# compute the mean over var
vcov_df <- vcov_df %>%
summarize_all(mean)
# create matrices
mat1 <- matrix(vcov_df$Grundschule, 1, 1)
mat2 <- matrix(vcov_df$Gymnasium, 1, 1)
variances <- list(mat1, mat2)
## compute mean of estimates
bf_data <- mean_df %>%
summarize_all(mean)
bf_data <- as.numeric(bf_data)
names(bf_data) <- c("Grund", "Gym")
## BAIN ##### #
# generating hypotheses
hypotheses <- "Gym = Grund; Gym < Grund; Gym > Grund"
bf_hyp <- bain(bf_data,
hypothesis = hypotheses,
n = samp_gr, # size of the groups
Sigma = variances, # matrices of residual variances of groups
group_parameters = 1, # there is 1 group specific parameter (the mean in each group)
joint_parameters = 0 # there are no parameters that apply to each of the groups (e.g. the regression coefficient of a covariate)
)
print(bf_hyp)## Bayesian informative hypothesis testing for an object of class numeric:
##
## Fit_eq Com_eq Fit_in Com_in Fit Com BF PMPa PMPb
## H1 3.695 0.170 1.000 1.000 3.695 0.170 21.714 0.916 0.879
## H2 1.000 1.000 0.504 0.500 0.504 0.500 1.018 0.043 0.041
## H3 1.000 1.000 0.496 0.500 0.496 0.500 0.983 0.042 0.040
## Hu 0.040
##
## Hypotheses:
## H1: Gym=Grund
## H2: Gym<Grund
## H3: Gym>Grund
##
## Note: BF denotes the Bayes factor of the hypothesis at hand versus its complement.
## H1 H2 H3
## H1 1.00000000 21.5251188 21.906863
## H2 0.04645735 1.0000000 1.017735
## H3 0.04564779 0.9825742 1.000000
Aufgrund fehlender Standards im pooling von Bayes Faktoren (BF), berechnen wir für jeden Datensatz einen Bayes Faktor und berichten anschließend deren Verteilung.
# Compute density of BFs
kl_results <- data.frame()
for (i in 1:m) {
fit <- correlationBF(y = mice::complete(imp, i)$sh_mel, # correlation
x = mice::complete(imp, i)$Klassenstufe)
kl_results[i,"sh_mel-Klassenstufe"] <- extractBF(fit)$bf # save BF in data frame
rm(fit)
}
# show summary of BFs
summary(kl_results[,"sh_mel-Klassenstufe"]) ## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.1036 0.1049 0.1091 0.1192 0.1209 0.6346
# plot BFs distribution
ggplot(kl_results, aes(x = `sh_mel-Klassenstufe`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `sh_mel-Klassenstufe`, y=""),
height = 1, width = 0, alpha = .3,
size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4,
size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4,
size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))###### DESCRIPTIVE PLOT ############################################### #
sh_mel_p <- p_data %>%
dplyr::filter(!is.na(sh_mel) & !is.na(Klassenstufe)) %>%
group_by(Klassenstufe) %>%
mutate(length = length(sh_mel)) %>%
summarize(sh_mel = mean(sh_mel, na.rm=T),
length_n = mean(length)) %>%
ungroup() %>%
mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
Schulart = as.factor(case_when(
Klassenstufe == "1" ~ "Grundschule",
Klassenstufe == "1_2" ~ "Grundschule",
Klassenstufe == "2" ~ "Grundschule",
Klassenstufe == "3" ~ "Grundschule",
Klassenstufe == "4" ~ "Grundschule",
TRUE ~ "Gymnasium"
)))
ggplot(sh_mel_p, aes(x = Klassenstufe, y = sh_mel, color = Schulart)) +
geom_line(aes(group = NA), color = "grey", size = 1) +
geom_point(aes(size = length_n)) +
geom_text(aes(label = round(sh_mel, 2), vjust = 3)) +
scale_y_continuous(limits = c(0,(max(sh_mel_p$sh_mel)*1.1)), expand = c(0,0)) +
geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
xlab("Klassenstufe") +
ylab("∅ absolute Häufigkeit") +
labs(size = "Anzahl\neingegangener\nStunden") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7")) #### Compute BFs within informed hypotheses framework (bain) ##################################### #
### for bain: compute vcov by hand ###
# create data frame to collect var and cov for each imputation
vcov_df <- data.frame(Grundschule = as.numeric(),
Gymnasium = as.numeric()
)
mean_df <- data.frame(Grundschule = as.numeric(),
Gymnasium = as.numeric()
)
# sample size per group
samp_gr <- table(mice::complete(imp)$Schulart)
## loop over all m imputations
for(i in 1:m) {
fit_lm <- lm(sh_fra ~ Schulart-1, data = mice::complete(imp, i))
var_lm <- summary(fit_lm)$sigma**2 # get variance of the means (VOM)
vcov_df[i, "Grundschule"] <- var_lm/samp_gr["Grundschule"] # compute VOM per group
vcov_df[i, "Gymnasium"] <- var_lm/samp_gr["Gymnasium"] # compute VOM per group
# collect estimates of the means
mean_df[i, "Grundschule"] <- coef(fit_lm)["SchulartGrundschule"]
mean_df[i, "Gymnasium"] <- coef(fit_lm)["SchulartGymnasium"]
rm(fit_lm, var_lm) # clean up, because we like it tidy in here
}
## make var matrices
# compute the mean over var
vcov_df <- vcov_df %>%
summarize_all(mean)
# create matrices
mat1 <- matrix(vcov_df$Grundschule, 1, 1)
mat2 <- matrix(vcov_df$Gymnasium, 1, 1)
variances <- list(mat1, mat2)
## compute mean of estimates
bf_data <- mean_df %>%
summarize_all(mean)
bf_data <- as.numeric(bf_data)
names(bf_data) <- c("Grund", "Gym")
## BAIN ##### #
# generating hypotheses
hypotheses <- "Gym = Grund; Gym < Grund; Gym > Grund"
bf_hyp <- bain(bf_data,
hypothesis = hypotheses,
n = samp_gr, # size of the groups
Sigma = variances, # matrices of residual variances of groups
group_parameters = 1, # there is 1 group specific parameter (the mean in each group)
joint_parameters = 0 # there are no parameters that apply to each of the groups (e.g. the regression coefficient of a covariate)
)
print(bf_hyp)## Bayesian informative hypothesis testing for an object of class numeric:
##
## Fit_eq Com_eq Fit_in Com_in Fit Com BF PMPa PMPb
## H1 1.716 0.133 1.000 1.000 1.716 0.133 12.940 0.866 0.812
## H2 1.000 1.000 0.846 0.500 0.846 0.500 5.475 0.113 0.106
## H3 1.000 1.000 0.154 0.500 0.154 0.500 0.183 0.021 0.019
## Hu 0.063
##
## Hypotheses:
## H1: Gym=Grund
## H2: Gym<Grund
## H3: Gym>Grund
##
## Note: BF denotes the Bayes factor of the hypothesis at hand versus its complement.
## H1 H2 H3
## H1 1.00000000 7.6514875 41.892639
## H2 0.13069354 1.0000000 5.475097
## H3 0.02387054 0.1826452 1.000000
Aufgrund fehlender Standards im pooling von Bayes Faktoren (BF), berechnen wir für jeden Datensatz einen Bayes Faktor und berichten anschließend deren Verteilung.
# Compute density of BFs
kl_results <- data.frame()
for (i in 1:m) {
fit <- correlationBF(y = mice::complete(imp, i)$sh_fra, # correlation
x = mice::complete(imp, i)$Klassenstufe)
kl_results[i,"sh_fra-Klassenstufe"] <- extractBF(fit)$bf # save BF in data frame
rm(fit)
}
# show summary of BFs
summary(kl_results[,"sh_fra-Klassenstufe"]) ## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.1104 0.1962 0.2105 0.2105 0.2251 0.3108
# plot BFs distribution
ggplot(kl_results, aes(x = `sh_fra-Klassenstufe`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `sh_fra-Klassenstufe`, y=""),
height = 1, width = 0, alpha = .3,
size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4,
size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4,
size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))###### DESCRIPTIVE PLOT ############################################### #
sh_fra_p <- p_data %>%
dplyr::filter(!is.na(sh_fra) & !is.na(Klassenstufe)) %>%
group_by(Klassenstufe) %>%
mutate(length = length(sh_fra)) %>%
summarize(sh_fra = mean(sh_fra, na.rm=T),
length_n = mean(length)) %>%
ungroup() %>%
mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
Schulart = as.factor(case_when(
Klassenstufe == "1" ~ "Grundschule",
Klassenstufe == "1_2" ~ "Grundschule",
Klassenstufe == "2" ~ "Grundschule",
Klassenstufe == "3" ~ "Grundschule",
Klassenstufe == "4" ~ "Grundschule",
TRUE ~ "Gymnasium"
)))
ggplot(sh_fra_p, aes(x = Klassenstufe, y = sh_fra, color = Schulart)) +
geom_line(aes(group = NA), color = "grey", size = 1) +
geom_point(aes(size = length_n)) +
geom_text(aes(label = round(sh_fra, 2), vjust = 3)) +
scale_y_continuous(limits = c(0,(max(sh_fra_p$sh_fra)*1.1)), expand = c(0,0)) +
geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
xlab("Klassenstufe") +
ylab("∅ absolute Häufigkeit") +
labs(size = "Anzahl\neingegangener\nStunden") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7")) #### Compute BFs within informed hypotheses framework (bain) ##################################### #
### for bain: compute vcov by hand ###
# create data frame to collect var and cov for each imputation
vcov_df <- data.frame(Grundschule = as.numeric(),
Gymnasium = as.numeric()
)
mean_df <- data.frame(Grundschule = as.numeric(),
Gymnasium = as.numeric()
)
# sample size per group
samp_gr <- table(mice::complete(imp)$Schulart)
## loop over all m imputations
for(i in 1:m) {
fit_lm <- lm(sh_not ~ Schulart-1, data = mice::complete(imp, i))
var_lm <- summary(fit_lm)$sigma**2 # get variance of the means (VOM)
vcov_df[i, "Grundschule"] <- var_lm/samp_gr["Grundschule"] # compute VOM per group
vcov_df[i, "Gymnasium"] <- var_lm/samp_gr["Gymnasium"] # compute VOM per group
# collect estimates of the means
mean_df[i, "Grundschule"] <- coef(fit_lm)["SchulartGrundschule"]
mean_df[i, "Gymnasium"] <- coef(fit_lm)["SchulartGymnasium"]
rm(fit_lm, var_lm) # clean up, because we like it tidy in here
}
## make var matrices
# compute the mean over var
vcov_df <- vcov_df %>%
summarize_all(mean)
# create matrices
mat1 <- matrix(vcov_df$Grundschule, 1, 1)
mat2 <- matrix(vcov_df$Gymnasium, 1, 1)
variances <- list(mat1, mat2)
## compute mean of estimates
bf_data <- mean_df %>%
summarize_all(mean)
bf_data <- as.numeric(bf_data)
names(bf_data) <- c("Grund", "Gym")
## BAIN ##### #
# generating hypotheses
hypotheses <- "Gym = Grund; Gym < Grund; Gym > Grund"
bf_hyp <- bain(bf_data,
hypothesis = hypotheses,
n = samp_gr, # size of the groups
Sigma = variances, # matrices of residual variances of groups
group_parameters = 1, # there is 1 group specific parameter (the mean in each group)
joint_parameters = 0 # there are no parameters that apply to each of the groups (e.g. the regression coefficient of a covariate)
)
print(bf_hyp)## Bayesian informative hypothesis testing for an object of class numeric:
##
## Fit_eq Com_eq Fit_in Com_in Fit Com BF PMPa PMPb
## H1 0.000 0.005 1.000 1.000 0.000 0.005 0.068 0.033 0.022
## H2 1.000 1.000 1.000 0.500 1.000 0.500 2905.398 0.967 0.652
## H3 1.000 1.000 0.000 0.500 0.000 0.500 0.000 0.000 0.000
## Hu 0.326
##
## Hypotheses:
## H1: Gym=Grund
## H2: Gym<Grund
## H3: Gym>Grund
##
## Note: BF denotes the Bayes factor of the hypothesis at hand versus its complement.
## H1 H2 H3
## H1 1.00000000 0.0342085490 99.38945
## H2 29.23245880 1.0000000000 2905.39808
## H3 0.01006143 0.0003441869 1.00000
Aufgrund fehlender Standards im pooling von Bayes Faktoren (BF), berechnen wir für jeden Datensatz einen Bayes Faktor und berichten anschließend deren Verteilung.
# Compute density of BFs
kl_results <- data.frame()
for (i in 1:m) {
fit <- correlationBF(y = mice::complete(imp, i)$sh_not, # correlation
x = mice::complete(imp, i)$Klassenstufe)
kl_results[i,"sh_not-Klassenstufe"] <- extractBF(fit)$bf # save BF in data frame
rm(fit)
}
# show summary of BFs
summary(kl_results[,"sh_not-Klassenstufe"]) ## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.362 5.455 8.044 9.964 12.732 44.463
# plot BFs distribution
ggplot(kl_results, aes(x = `sh_not-Klassenstufe`)) +
geom_density(alpha = .3, fill = "#b4a069", color = NA) +
geom_jitter(aes(x = `sh_not-Klassenstufe`, y=""),
height = 1, width = 0, alpha = .3,
size = 2.5, fill = "#b4a069", color = "#b4a069") +
geom_vline(xintercept = (1/3), color = "red", alpha = .4,
size = 2, linetype = "dashed") +
geom_vline(xintercept = 3, color = "red", alpha = .4,
size = 2, linetype = "dashed") +
scale_x_continuous(expand = c(0, 0), trans = 'log10') +
scale_y_discrete(expand = c(0, 0)) +
ylab("% Häufigkeit") +
xlab("BF [log10]") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7"))###### DESCRIPTIVE PLOT ############################################### #
sh_not_p <- p_data %>%
dplyr::filter(!is.na(sh_not) & !is.na(Klassenstufe)) %>%
group_by(Klassenstufe) %>%
mutate(length = length(sh_not)) %>%
summarize(sh_not = mean(sh_not, na.rm=T),
length_n = mean(length)) %>%
ungroup() %>%
mutate(Klassenstufe = factor(Klassenstufe, levels = c("1", "1_2", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12")),
Schulart = as.factor(case_when(
Klassenstufe == "1" ~ "Grundschule",
Klassenstufe == "1_2" ~ "Grundschule",
Klassenstufe == "2" ~ "Grundschule",
Klassenstufe == "3" ~ "Grundschule",
Klassenstufe == "4" ~ "Grundschule",
TRUE ~ "Gymnasium"
)))
ggplot(sh_not_p, aes(x = Klassenstufe, y = sh_not, color = Schulart)) +
geom_line(aes(group = NA), color = "grey", size = 1) +
geom_point(aes(size = length_n)) +
geom_text(aes(label = paste(round(sh_not,0), "%"), vjust = 3)) +
scale_y_continuous(limits = c(0,(max(sh_not_p$sh_not)*1.1)), expand = c(0,0)) +
geom_rect(aes(xmin = 0, xmax = 4.5, ymin = -Inf, ymax = Inf), fill = "pink", alpha = .01, color = NA) +
geom_rect(aes(xmin = 4.5, xmax = 14, ymin = -Inf, ymax = Inf), fill = "#BFEFFF", alpha = .01, color = NA) +
xlab("Klassenstufe") +
ylab("∅ absolute Häufigkeit") +
labs(size = "Anzahl\neingegangener\nStunden") +
theme_light() +
theme(axis.line = element_line(colour = "#696f71"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_rect(fill = "#f6f7f7")) In Abhängigkeit von der Kombination der Skalenniveaus beider Variablen, werden entsprechende Korrelationen berechnet.
Bei großen Stichproben approximiert Pearson’s \(\varphi\) die Pearson Produkt-Moment-Korrelation \(r\) und so werden die berechneten Werten für \(\varphi\) und \(r\) in ihrer Größe vergleichbar.
Computing all correlations using Pearson. Correlations between two dichotomous variables will be replaced later by Pearson’s \(\varphi\) (see below). To compute and replace them later anyway doesn’t make hell of a lot sense, but it was just easier/quicker to compute all correlations (with the full data set) and then replace the dichotomous ones afterwards.
p_dataGS <- p_data %>%
filter(Schulart == "Grundschule")
p_dataGY <- p_data %>%
filter(Schulart == "Gymnasium")
impGS <- mice(p_dataGS,
maxit = maxit,
m = m,
meth = meth,
pred = pred,
seed = 666,
printFlag = F
)
impGY <- mice(p_dataGY,
maxit = maxit,
m = m,
meth = meth,
pred = pred,
seed = 666,
printFlag = F
)
corGS <- miceadds::micombine.cor(impGS, variables = c("lh_ank", "lh_auf", "lh_nen",
"lh_sch", "lh_erl", "lh_wfr",
"lh_wno", "lh_ano", "lh_bfr",
"sh_auf", "sh_mel", "sh_fra",
"sh_not"))
corGY <- miceadds::micombine.cor(impGY, variables = c("lh_ank", "lh_auf", "lh_nen",
"lh_sch", "lh_erl", "lh_wfr",
"lh_wno", "lh_ano", "lh_bfr",
"sh_auf", "sh_mel", "sh_fra",
"sh_not"))
corGS <- corGS[1:78,] %>%
mutate(Korrelation = paste(variable1, variable2, sep="-"),
rGS = round(r, digits = 3)) %>%
dplyr::select(Korrelation, rGS)
corGY <- corGY[1:78,] %>%
mutate(Korrelation = paste(variable1, variable2, sep="-"),
rGY = round(r, digits = 3)) %>%
dplyr::select(Korrelation, rGY)
corGSGY <- full_join(corGS, corGY, by = "Korrelation")Computing dichotomous correlations.
cor_var <- c("lh_ank", "lh_auf", "lh_nen", "lh_sch", "lh_erl", "lh_wfr", "lh_wno", "lh_ano")
cor_diGS <- data.frame()
cor_diGY <- data.frame()
# Grundschule
for (n_i in 1:i) { # über alle Imputationen
for (cor_1 in 1:7) { # Kombinationen aller dichotomen
for (cor_2 in (cor_1+1):8) { # Variablen erstellen
# create contingency table
imp_m <- eval(parse(text = paste0("matrix(table(",
"mice::complete(impGS, ", n_i, ")$", cor_var[cor_1],
", ",
"mice::complete(impGS, ", n_i, ")$", cor_var[cor_2],
"), 2, 2)",
sep="") # für jeden vollständigen Datensatz
) # mit einer bestimmten Var-Kombination
)
# Wert in Datensatz speichern
cor_diGS[n_i, paste0(cor_var[cor_1], "-", cor_var[cor_2])] <- phi(imp_m, digits = 3)
rm(imp_m)
}
}
}
# Gymnasium
for (n_i in 1:i) { # über alle Imputationen
for (cor_1 in 1:7) { # Kombinationen aller dichotomen
for (cor_2 in (cor_1+1):8) { # Variablen erstellen
# create contingency table
imp_m <- eval(parse(text = paste0("matrix(table(",
"mice::complete(impGY, ", n_i, ")$", cor_var[cor_1],
", ",
"mice::complete(impGY, ", n_i, ")$", cor_var[cor_2],
"), 2, 2)",
sep="") # für jeden vollständigen Datensatz
) # mit einer bestimmten Var-Kombination
)
# Wert in Datensatz speichern
cor_diGY[n_i, paste0(cor_var[cor_1], "-", cor_var[cor_2])] <- phi(imp_m, digits = 3)
rm(imp_m)
}
}
}
In order to combine correlation estimates via Rubin’s Rules we first have to apply Fisher-Transformation for Correlation to z-Score as correlations are not normally distributed and then transform the values back to correlation coefficients. https://dx.doi.org/10.1186%2F1471-2288-9-57
# apply Fisher-Transformation
cor_di_fisherGS <- FisherZ(cor_diGS)
cor_di_fisherGY <- FisherZ(cor_diGY)
# combine estimates
cor_di_fisherGS <- cor_di_fisherGS %>%
summarise_all(mean)
cor_di_fisherGY <- cor_di_fisherGY %>%
summarise_all(mean)
# transform back to correlation coefficients
cor_diGS <- FisherZInv(cor_di_fisherGS)
cor_diGY <- FisherZInv(cor_di_fisherGY)
# pivot into long data format, for later joining
cor_diGS_l <- pivot_longer(cor_diGS, names_to = "Korrelation", values_to = "rGS", cols = 1:28)
cor_diGY_l <- pivot_longer(cor_diGY, names_to = "Korrelation", values_to = "rGY", cols = 1:28)
cor_di <- full_join(cor_diGS_l, cor_diGY_l, by = "Korrelation")
names_di <- cor_di$Korrelation
corGSGY_nondi <- corGSGY %>%
dplyr::filter(!Korrelation %in% names_di)
cor_values <- bind_rows(corGSGY_nondi, cor_di)
Bitte mit der Maus über die einzelnen Punkte fahren, um zu erfahren um welche Korrelation es sich handelt.
axis_def <- list(range = c(-.4, 1),
dtick = 0.25,
automargin = TRUE)
plot_ly() %>%
add_trace(
x = c(-.4,-.4,1),
y = c(-.4, 1, 1),
name = "Korrelation in GY größer",
type = 'scatter',
fill = 'toself',
fillcolor = '#BFEFFF',
opacity = .4,
hoveron = 'points',
marker = list(
color = '#BFEFFF',
opacity = 0),
line = list(
color = '#BFEFFF'),
text = "",
hoverinfo = ''
) %>%
add_trace(
x = c(-.4, 1,1),
y = c(-.4,-.4, 1),
name = "Korrelation in GS größer",
type = 'scatter',
fill = 'toself',
fillcolor = 'pink',
opacity = .4,
hoveron = 'points',
marker = list(
color = 'pink',
opacity = 0),
line = list(
color = 'pink'),
text = "",
hoverinfo = ''
) %>%
add_trace(
data = cor_values,
x = ~rGS,
y = ~rGY,
type = "scatter",
mode = "markers",
marker = list(
color = "darkgrey",
opacity = .85
),
name = "Korrelationskoeffizient in GY und GS",
text = ~paste("Korrelation: ", Korrelation,
'<br>r<sub>GY</sub>= ', rGY,
'<br>r<sub>GS</sub>= ', rGS)) %>%
layout(
xaxis = axis_def,
yaxis = axis_def,
autosize = F,
width = 800,
height = 800,
legend = list(orientation = 'h',
x = 0,
y = 1))Genaue Werte der Korrelationen
| Korrelation | rGS | rGY |
|---|---|---|
| lh_ank-lh_bfr | 0.061 | 0.008 |
| lh_ank-sh_auf | 0.234 | 0.244 |
| lh_ank-sh_mel | 0.028 | 0.054 |
| lh_ank-sh_fra | 0.052 | 0.023 |
| lh_ank-sh_not | 0.158 | 0.150 |
| lh_auf-lh_bfr | 0.004 | 0.056 |
| lh_auf-sh_auf | 0.281 | 0.093 |
| lh_auf-sh_mel | -0.031 | 0.040 |
| lh_auf-sh_fra | -0.031 | 0.045 |
| lh_auf-sh_not | 0.077 | -0.006 |
| lh_nen-lh_bfr | 0.031 | -0.021 |
| lh_nen-sh_auf | 0.099 | 0.152 |
| lh_nen-sh_mel | 0.063 | -0.033 |
| lh_nen-sh_fra | 0.029 | -0.035 |
| lh_nen-sh_not | -0.012 | -0.046 |
| lh_sch-lh_bfr | -0.073 | 0.091 |
| lh_sch-sh_auf | 0.105 | 0.110 |
| lh_sch-sh_mel | 0.002 | -0.004 |
| lh_sch-sh_fra | -0.055 | 0.074 |
| lh_sch-sh_not | 0.544 | 0.585 |
| lh_erl-lh_bfr | 0.183 | 0.151 |
| lh_erl-sh_auf | 0.307 | 0.111 |
| lh_erl-sh_mel | 0.176 | 0.098 |
| lh_erl-sh_fra | 0.143 | 0.150 |
| lh_erl-sh_not | 0.095 | 0.143 |
| lh_wfr-lh_bfr | 0.155 | 0.078 |
| lh_wfr-sh_auf | 0.085 | 0.185 |
| lh_wfr-sh_mel | 0.324 | 0.089 |
| lh_wfr-sh_fra | 0.137 | 0.048 |
| lh_wfr-sh_not | 0.126 | 0.052 |
| lh_bfr-lh_wno | -0.071 | 0.133 |
| lh_bfr-lh_ano | -0.002 | 0.132 |
| lh_bfr-sh_auf | 0.074 | -0.001 |
| lh_bfr-sh_mel | 0.578 | 0.746 |
| lh_bfr-sh_fra | 0.937 | 0.918 |
| lh_bfr-sh_not | -0.069 | 0.100 |
| lh_wno-sh_auf | 0.070 | 0.120 |
| lh_wno-sh_mel | 0.050 | 0.008 |
| lh_wno-sh_fra | -0.044 | 0.122 |
| lh_wno-sh_not | 0.793 | 0.504 |
| lh_ano-sh_auf | 0.104 | 0.253 |
| lh_ano-sh_mel | 0.093 | 0.081 |
| lh_ano-sh_fra | 0.017 | 0.133 |
| lh_ano-sh_not | 0.702 | 0.440 |
| sh_auf-sh_mel | 0.059 | -0.051 |
| sh_auf-sh_fra | 0.084 | -0.001 |
| sh_auf-sh_not | 0.105 | 0.131 |
| sh_mel-sh_fra | 0.565 | 0.753 |
| sh_mel-sh_not | 0.053 | 0.005 |
| sh_fra-sh_not | -0.050 | 0.087 |
| lh_ank-lh_auf | 0.192 | 0.178 |
| lh_ank-lh_nen | -0.013 | 0.001 |
| lh_ank-lh_sch | 0.111 | 0.222 |
| lh_ank-lh_erl | 0.149 | 0.186 |
| lh_ank-lh_wfr | 0.102 | 0.114 |
| lh_ank-lh_wno | 0.181 | 0.108 |
| lh_ank-lh_ano | 0.133 | 0.126 |
| lh_auf-lh_nen | 0.113 | 0.161 |
| lh_auf-lh_sch | 0.045 | 0.111 |
| lh_auf-lh_erl | 0.144 | 0.154 |
| lh_auf-lh_wfr | 0.143 | 0.176 |
| lh_auf-lh_wno | 0.120 | 0.136 |
| lh_auf-lh_ano | 0.116 | 0.170 |
| lh_nen-lh_sch | 0.097 | -0.107 |
| lh_nen-lh_erl | 0.136 | 0.025 |
| lh_nen-lh_wfr | 0.031 | 0.011 |
| lh_nen-lh_wno | 0.057 | -0.046 |
| lh_nen-lh_ano | 0.056 | 0.004 |
| lh_sch-lh_erl | 0.113 | 0.148 |
| lh_sch-lh_wfr | 0.088 | 0.077 |
| lh_sch-lh_wno | 0.464 | 0.377 |
| lh_sch-lh_ano | 0.348 | 0.292 |
| lh_erl-lh_wfr | 0.210 | 0.143 |
| lh_erl-lh_wno | 0.030 | 0.235 |
| lh_erl-lh_ano | 0.136 | 0.208 |
| lh_wfr-lh_wno | 0.139 | 0.165 |
| lh_wfr-lh_ano | 0.150 | 0.189 |
| lh_wno-lh_ano | 0.637 | 0.556 |
Korrelationen des Zeitpunkts und Dauer der Hausaufgabenvergabe
über beide Schularten hinweg.
cor45_hw <- miceadds::micombine.cor(imp45, variables = c("beg_hw_min", "dau_hw_min"))
cor45_hw <- cor45_hw %>%
select(variable1, variable2, r)
cor90_hw <- miceadds::micombine.cor(imp90, variables = c("beg_hw_min", "dau_hw_min"))
cor90_hw <- cor90_hw %>%
select(variable1, variable2, r)
cor_hw <- bind_rows(cor45_hw[1,], cor90_hw[1,])
cor_hw$Stundenlaenge <- c("Einzelstunde", "Doppelstunde")
cor_hw <- cor_hw[,c("Stundenlaenge", "variable1", "variable2", "r")]
pander(cor_hw)| Stundenlaenge | variable1 | variable2 | r |
|---|---|---|---|
| Einzelstunde | beg_hw_min | dau_hw_min | -0.326 |
| Doppelstunde | beg_hw_min | dau_hw_min | -0.485 |
zur Vergleichbarkeit mit der Schulart, wurden die Effektmaße als Korrelation berechnet.
In Abhängigkeit von der Kombination der Skalenniveaus beider Variablen, werden entsprechende Korrelationen berechnet:
# correlation of Klassenstufe with all excepct time-related variables
eff_klass_a <- miceadds::micombine.cor(imp,
variables = c("Klassenstufe",
"lh_ank", "lh_auf", "lh_nen",
"lh_sch", "lh_erl", "lh_wfr",
"lh_wno", "lh_ano", "lh_bfr",
"sh_auf", "sh_mel", "sh_fra",
"sh_not"))
# filtering out unnecessary information
eff_klass_a <- eff_klass_a %>%
dplyr::filter(variable1 == "Klassenstufe") %>%
dplyr::select(variable1, variable2, r) %>%
mutate(r = round(r, digits = 3))
# correlation of Klassenstufe with all time-related variables
# 45min hours
eff_klass45 <- miceadds::micombine.cor(imp45,
variables = c("Klassenstufe",
"beg_hw_min",
"dau_hw_min"))
# filtering out unnecessary information
eff_klass45 <- eff_klass45 %>%
dplyr::filter(variable1 == "Klassenstufe") %>%
dplyr::select(variable1, variable2, r) %>%
mutate(r = round(r, digits = 3))
# 90min hours
eff_klass90 <- miceadds::micombine.cor(imp90,
variables = c("Klassenstufe",
"beg_hw_min",
"dau_hw_min"))
# filtering out unnecessary information
eff_klass90 <- eff_klass90 %>%
dplyr::filter(variable1 == "Klassenstufe") %>%
dplyr::select(variable1, variable2, r) %>%
mutate(r = round(r, digits = 3))
eff_klass <- bind_rows(eff_klass_a, eff_klass45, eff_klass90)
pander(eff_klass)| variable1 | variable2 | r |
|---|---|---|
| Klassenstufe | lh_ank | -0.216 |
| Klassenstufe | lh_auf | -0.249 |
| Klassenstufe | lh_nen | 0.006 |
| Klassenstufe | lh_sch | -0.066 |
| Klassenstufe | lh_erl | -0.041 |
| Klassenstufe | lh_wfr | 0.015 |
| Klassenstufe | lh_bfr | -0.037 |
| Klassenstufe | lh_wno | -0.350 |
| Klassenstufe | lh_ano | -0.367 |
| Klassenstufe | sh_auf | -0.215 |
| Klassenstufe | sh_mel | 0.000 |
| Klassenstufe | sh_fra | -0.053 |
| Klassenstufe | sh_not | -0.131 |
| Klassenstufe | beg_hw_min | 0.215 |
| Klassenstufe | dau_hw_min | -0.286 |
| Klassenstufe | beg_hw_min | 0.318 |
| Klassenstufe | dau_hw_min | -0.357 |
Die Effekttärken der Schulart auf dichotome Variablen wurden bereits jeweils im Zuger der bayesianischen Hypothesentets (anhand des Pearson’s \(\varphi\)) berechnet, da hier ebenfalls Schleifen programmiert werden mussten. An dieser Stelle werden nun zusätzlich die Effektstärken auf die metrischen Variablen anhand der Punkt-Biserialen Korrelation \(r_{pb}\) nach Pearson brechnet. Die Variable “Schulart” wurde hierfür in einer nummerische Variable umcodiert, wobei Grundschule = 0 und Gymnasium = 1.
# correlation of Schulart with all matric variables excepct time-related variables
eff_schul_a <- miceadds::micombine.cor(imp,
variables = c("Schulart_n", "lh_bfr", "sh_auf",
"sh_mel", "sh_fra", "sh_not"))
# filtering out unnecessary information
eff_schul_a <- eff_schul_a %>%
dplyr::filter(variable1 == "Schulart_n") %>%
dplyr::select(variable1, variable2, r) %>%
mutate(r = round(r, digits = 3))
# correlation of Schulart with all time-related variables
# 45min hours
eff_schul45 <- miceadds::micombine.cor(imp45,
variables = c("Schulart_n",
"beg_hw_min",
"dau_hw_min"))
# filtering out unnecessary information
eff_schul45 <- eff_schul45 %>%
dplyr::filter(variable1 == "Schulart_n") %>%
dplyr::select(variable1, variable2, r) %>%
mutate(r = round(r, digits = 3))
# 90min hours
eff_schul90 <- miceadds::micombine.cor(imp90,
variables = c("Schulart_n",
"beg_hw_min",
"dau_hw_min"))
# filtering out unnecessary information
eff_schul90 <- eff_schul90 %>%
dplyr::filter(variable1 == "Schulart_n") %>%
dplyr::select(variable1, variable2, r) %>%
mutate(r = round(r, digits = 3))
eff_schular <- bind_rows(eff_schul_a, eff_schul45, eff_schul90)
pander(eff_schular)| variable1 | variable2 | r |
|---|---|---|
| Schulart_n | lh_bfr | -0.030 |
| Schulart_n | sh_auf | -0.191 |
| Schulart_n | sh_mel | -0.001 |
| Schulart_n | sh_fra | -0.045 |
| Schulart_n | sh_not | -0.149 |
| Schulart_n | beg_hw_min | 0.216 |
| Schulart_n | dau_hw_min | -0.252 |
| Schulart_n | beg_hw_min | 0.240 |
| Schulart_n | dau_hw_min | -0.386 |